diff --git a/tiled/adapters/arrow.py b/tiled/adapters/arrow.py index 5c6e5776d..d391c0b74 100644 --- a/tiled/adapters/arrow.py +++ b/tiled/adapters/arrow.py @@ -1,7 +1,6 @@ import copy from pathlib import Path from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union -from typing import Callable, Dict, Iterator, List, Optional, Tuple, Union from urllib.parse import quote_plus import pandas diff --git a/tiled/adapters/awkward_buffers.py b/tiled/adapters/awkward_buffers.py index 87296c133..b51fbb747 100644 --- a/tiled/adapters/awkward_buffers.py +++ b/tiled/adapters/awkward_buffers.py @@ -14,7 +14,6 @@ from ..structures.awkward import AwkwardStructure from ..structures.core import Spec, StructureFamily from ..structures.data_source import Asset, DataSource, Storage - from ..type_aliases import JSON from ..utils import path_from_uri from .awkward import AwkwardAdapter diff --git a/tiled/adapters/sql.py b/tiled/adapters/sql.py index a0a97a653..b046092a5 100644 --- a/tiled/adapters/sql.py +++ b/tiled/adapters/sql.py @@ -94,16 +94,25 @@ def init_storage( data_source.structure.arrow_schema_decoded ) # based on hash of Arrow schema encoded = schema.serialize() + default_table_name = "table_" + hashlib.md5(encoded).hexdigest() - data_source.parameters.setdefault("table_name", default_table_name) + table_name = data_source.parameters.setdefault("table_name", default_table_name) + data_source.parameters["dataset_id"] = secrets.randbits(63) data_uri = storage.get("sql") # TODO scrub credential schema_new = schema.insert(0, pyarrow.field("dataset_id", pyarrow.int64())) - statement = schema_to_pg_create_table(schema_new, default_table_name) + create_table_statement = schema_to_pg_create_table(schema_new, table_name) + + create_index_statement = f"""create + index if not exists + dataset_id_index + on + {table_name}(dataset_id);""" conn = create_connection(data_uri) - conn.cursor().execute(statement) + conn.cursor().execute(create_table_statement) + conn.cursor().execute(create_index_statement) conn.commit() data_source.assets.append( @@ -232,8 +241,8 @@ def read_partition( raise NotImplementedError return self.read(fields) +def create_connection(uri: str): -def create_connection(uri): if uri.startswith("sqlite:"): # Ensure this path is writable to avoid a confusing error message # from abdc_driver_sqlite. diff --git a/tiled/structures/table.py b/tiled/structures/table.py index 7052355cb..f6890b5e6 100644 --- a/tiled/structures/table.py +++ b/tiled/structures/table.py @@ -2,6 +2,7 @@ import io from dataclasses import dataclass from typing import List, Tuple, Union + import pyarrow B64_ENCODED_PREFIX = "data:application/vnd.apache.arrow.file;base64," @@ -64,9 +65,9 @@ def from_arrays(cls, arr, names): schema_b64 = base64.b64encode(schema_bytes).decode("utf-8") data_uri = B64_ENCODED_PREFIX + schema_b64 return cls(arrow_schema=data_uri, npartitions=1, columns=list(names)) + @classmethod def from_schema(cls, schema: pyarrow.Schema, npartitions: int = 1): - schema_bytes = schema.serialize() schema_b64 = base64.b64encode(schema_bytes).decode("utf-8") data_uri = B64_ENCODED_PREFIX + schema_b64