Skip to content

Commit

Permalink
[python] [NO MERGE UNTIL 1.15] Remove tiledb-py dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Oct 15, 2024
1 parent 2942ea2 commit 7af640c
Show file tree
Hide file tree
Showing 12 changed files with 180 additions and 207 deletions.
1 change: 1 addition & 0 deletions apis/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def run(self):
"src/tiledbsoma/common.cc",
"src/tiledbsoma/reindexer.cc",
"src/tiledbsoma/query_condition.cc",
"src/tiledbsoma/soma_vfs.cc",
"src/tiledbsoma/soma_context.cc",
"src/tiledbsoma/soma_array.cc",
"src/tiledbsoma/soma_object.cc",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ def from_isolated_h5ad(
experiment, not in append mode, but allowing us to still have the bulk of the ingestor code
to be non-duplicated between non-append mode and append mode.
"""
tiledb_ctx = None if context is None else context.tiledb_ctx
with read_h5ad(h5ad_file_name, mode="r", ctx=tiledb_ctx) as adata:
with read_h5ad(h5ad_file_name, mode="r", ctx=context) as adata:
return cls.from_isolated_anndata(
adata,
measurement_name=measurement_name,
Expand Down Expand Up @@ -434,8 +433,7 @@ def from_h5ad_append_on_experiment(
"""Extends registration data to one more H5AD input file."""
tiledbsoma.logging.logger.info(f"Registration: registering {h5ad_file_name}.")

tiledb_ctx = None if context is None else context.tiledb_ctx
with read_h5ad(h5ad_file_name, mode="r", ctx=tiledb_ctx) as adata:
with read_h5ad(h5ad_file_name, mode="r", ctx=context) as adata:
return cls.from_anndata_append_on_experiment(
adata,
previous,
Expand Down
10 changes: 6 additions & 4 deletions apis/python/src/tiledbsoma/io/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
import pyarrow as pa
from anndata._core import file_backing

import tiledb

from .. import pytiledbsoma as clib
from .._exception import SOMAError
from .._types import Path
from ..options import SOMATileDBContext

_pa_type_to_str_fmt = {
pa.string(): "U",
Expand All @@ -42,12 +42,14 @@

@contextmanager
def read_h5ad(
input_path: Path, *, mode: str = "r", ctx: Optional[tiledb.Ctx] = None
input_path: Path, *, mode: str = "r", ctx: Optional[SOMATileDBContext] = None
) -> Iterator[ad.AnnData]:
"""
This lets us ingest H5AD with "r" (backed mode) from S3 URIs.
"""
input_handle = tiledb.VFS(ctx=ctx).open(input_path)
ctx = ctx or SOMATileDBContext()
vfs = clib.SOMAVFS(ctx.native_context)
input_handle = clib.SOMAVFSFilebuf(vfs).open(str(input_path))
try:
with _hack_patch_anndata():
anndata = ad.read_h5ad(_FSPathWrapper(input_handle, input_path), mode)
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def from_h5ad(

logging.log_io(None, f"START READING {input_path}")

with read_h5ad(input_path, mode="r", ctx=context.tiledb_ctx) as anndata:
with read_h5ad(input_path, mode="r", ctx=context) as anndata:
logging.log_io(None, _util.format_elapsed(s, f"FINISH READING {input_path}"))

uri = from_anndata(
Expand Down
84 changes: 10 additions & 74 deletions apis/python/src/tiledbsoma/options/_soma_tiledb_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,17 @@
import functools
import threading
import time
import warnings
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, Literal, Mapping, Optional, Union

from somacore import ContextBase
from typing_extensions import Self

import tiledb

from .. import pytiledbsoma as clib
from .._types import OpenTimestamp
from .._util import ms_to_datetime, to_timestamp_ms


def _warn_ctx_deprecation() -> None:
pass
# https://github.com/single-cell-data/TileDB-SOMA/issues/3134
# Skipping for 1.15.0rc0
# assert_version_before(1, 15)
warnings.warn(
"tiledb_ctx is now deprecated for removal in 1.15. "
"Use tiledb_config instead by passing "
"SOMATileDBContext(tiledb_config=ctx.config().dict()).",
DeprecationWarning,
stacklevel=3,
)


def _default_config(
override: Mapping[str, Union[str, float]]
) -> Dict[str, Union[str, float]]:
Expand All @@ -53,9 +36,9 @@ def _default_config(


@functools.lru_cache(maxsize=None)
def _default_global_ctx() -> tiledb.Ctx:
"""Lazily builds a default TileDB Context with the default config."""
return tiledb.Ctx(_default_config({}))
def _default_global_native_context() -> clib.SOMAContext:
"""Lazily builds a default SOMAContext with the default config."""
return clib.SOMAContext({k: str(v) for k, v in _default_config({}).items()})


def _maybe_timestamp_ms(input: Optional[OpenTimestamp]) -> Optional[int]:
Expand Down Expand Up @@ -83,7 +66,6 @@ class SOMATileDBContext(ContextBase):

def __init__(
self,
tiledb_ctx: Optional[tiledb.Ctx] = None,
tiledb_config: Optional[Dict[str, Union[str, float]]] = None,
timestamp: Optional[OpenTimestamp] = None,
threadpool: Optional[ThreadPoolExecutor] = None,
Expand Down Expand Up @@ -135,27 +117,12 @@ def __init__(
provided, a new ThreadPoolExecutor will be created with
default settings.
"""
if tiledb_ctx is not None:
_warn_ctx_deprecation()

if tiledb_ctx is not None and tiledb_config is not None:
raise ValueError(
"only one of tiledb_ctx or tiledb_config"
" may be set when constructing a SOMATileDBContext"
)
self._lock = threading.Lock()
"""A lock to ensure single initialization of ``_tiledb_ctx``."""
self._initial_config = (
self._initial_config: Optional[Dict[str, Union[str, float]]] = (
None if tiledb_config is None else _default_config(tiledb_config)
)

"""A dictionary of options to override the default TileDB config.
This includes both the user-provided options and the default options
that we provide to TileDB. If this is unset, then either we were
provided with a TileDB Ctx, or we need to use The Default Global Ctx.
"""
self._tiledb_ctx = tiledb_ctx
"""The TileDB context to use, either provided or lazily constructed."""
self._timestamp_ms = _maybe_timestamp_ms(timestamp)

Expand Down Expand Up @@ -186,25 +153,14 @@ def native_context(self) -> clib.SOMAContext:
"""The C++ SOMAContext for this SOMA context."""
with self._lock:
if self._native_context is None:
cfg = self._internal_tiledb_config()
self._native_context = clib.SOMAContext(
{k: str(v) for k, v in cfg.items()}
)
return self._native_context

@property
def tiledb_ctx(self) -> tiledb.Ctx:
"""The TileDB-Py Context for this SOMA context."""
_warn_ctx_deprecation()

with self._lock:
if self._tiledb_ctx is None:
if self._initial_config is None:
# Special case: we need to use the One Global Default.
self._tiledb_ctx = _default_global_ctx()
self._native_context = _default_global_native_context()
else:
self._tiledb_ctx = tiledb.Ctx(self._initial_config)
return self._tiledb_ctx
cfg = self._internal_tiledb_config()
self._native_context = clib.SOMAContext(
{k: str(v) for k, v in cfg.items()}
)
return self._native_context

@property
def tiledb_config(self) -> Dict[str, Union[str, float]]:
Expand All @@ -230,11 +186,6 @@ def _internal_tiledb_config(self) -> Dict[str, Union[str, float]]:
if self._native_context is not None:
return dict(self._native_context.config())

# We have TileDB Context. Return its actual config.
# TODO This block will be deleted once tiledb_ctx is removed in 1.15
if self._tiledb_ctx is not None:
return dict(self._tiledb_ctx.config())

# Our context has not yet been built.
# We return what will be passed into the context.
return (
Expand All @@ -247,7 +198,6 @@ def replace(
self,
*,
tiledb_config: Optional[Dict[str, Any]] = None,
tiledb_ctx: Optional[tiledb.Ctx] = None,
timestamp: Union[None, OpenTimestamp, _Unset] = _UNSET,
threadpool: Union[None, ThreadPoolExecutor, _Unset] = _UNSET,
) -> Self:
Expand Down Expand Up @@ -279,15 +229,7 @@ def replace(
... tiledb_config={"vfs.s3.region": None})
"""
with self._lock:
if tiledb_ctx is not None:
_warn_ctx_deprecation()

if tiledb_config is not None:
if tiledb_ctx:
raise ValueError(
"Either tiledb_config or tiledb_ctx may be provided"
" to replace(), but not both."
)
new_config = self._internal_tiledb_config()
new_config.update(tiledb_config)
tiledb_config = {k: v for (k, v) in new_config.items() if v is not None}
Expand All @@ -302,7 +244,6 @@ def replace(
assert timestamp is None or isinstance(timestamp, (datetime.datetime, int))
return type(self)(
tiledb_config=tiledb_config,
tiledb_ctx=tiledb_ctx,
timestamp=timestamp,
threadpool=threadpool,
)
Expand All @@ -327,11 +268,6 @@ def _validate_soma_tiledb_context(context: Any) -> SOMATileDBContext:
if context is None:
return SOMATileDBContext()

if isinstance(context, tiledb.Ctx):
raise TypeError(
"context is a tiledb.Ctx, not a SOMATileDBContext -- please wrap it in tiledbsoma.SOMATileDBContext(...)"
)

if not isinstance(context, SOMATileDBContext):
raise TypeError("context is not a SOMATileDBContext")

Expand Down
105 changes: 19 additions & 86 deletions apis/python/src/tiledbsoma/options/_tiledb_create_write_options.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import warnings
from typing import (
Any,
Dict,
Expand All @@ -11,18 +10,13 @@
TypedDict,
TypeVar,
Union,
cast,
)

import attrs as attrs_ # We use the name `attrs` later.
import attrs.validators as vld # Short name because we use this a bunch.
from somacore import options
from typing_extensions import Self

import tiledb

from .._general_utilities import assert_version_before

# Most defaults are configured directly as default attribute values
# within TileDBCreateOptions.
DEFAULT_TILE_EXTENT = 2048
Expand Down Expand Up @@ -192,44 +186,6 @@ def cell_tile_orders(self) -> Tuple[Optional[str], Optional[str]]:
return DEFAULT_CELL_ORDER, DEFAULT_TILE_ORDER
return self.cell_order, self.tile_order

def offsets_filters_tiledb(self) -> Tuple[tiledb.Filter, ...]:
"""Constructs the real TileDB Filters to use for offsets."""
assert_version_before(1, 15)
warnings.warn(
"`offsets_filters_tiledb` is now deprecated for removal in 1.15 "
"as we no longer support returning tiledb.Filter. "
"Use `offsets_filters` instead.",
DeprecationWarning,
)

return tuple(_build_filter(f) for f in self.offsets_filters)

def validity_filters_tiledb(self) -> Optional[Tuple[tiledb.Filter, ...]]:
"""Constructs the real TileDB Filters to use for the validity map."""
assert_version_before(1, 15)
warnings.warn(
"`validity_filters_tiledb` is now deprecated for removal in 1.15 "
"as we no longer support returning tiledb.Filter. "
"Use `validity_filters` instead.",
DeprecationWarning,
)
if self.validity_filters is None:
return None
return tuple(_build_filter(f) for f in self.validity_filters)

def dim_filters_tiledb(
self, dim: str, default: Sequence[_FilterSpec] = ()
) -> Tuple[tiledb.Filter, ...]:
"""Constructs the real TileDB Filters to use for the named dimension."""
assert_version_before(1, 15)
warnings.warn(
"`dim_filters_tiledb` is now deprecated for removal in 1.15 "
"as we no longer support returning tiledb.Filter. "
"Use `dims` instead.",
DeprecationWarning,
)
return _filters_from(self.dims, dim, default)

def dim_tile(self, dim_name: str, default: int = DEFAULT_TILE_EXTENT) -> int:
"""Returns the tile extent for the given dimension."""
try:
Expand All @@ -238,19 +194,6 @@ def dim_tile(self, dim_name: str, default: int = DEFAULT_TILE_EXTENT) -> int:
return default
return default if dim.tile is None else dim.tile

def attr_filters_tiledb(
self, name: str, default: Sequence[_FilterSpec] = ()
) -> Tuple[tiledb.Filter, ...]:
"""Constructs the real TileDB Filters to use for the named attribute."""
assert_version_before(1, 15)
warnings.warn(
"`attr_filters_tiledb` is now deprecated for removal in 1.15 "
"as we no longer support returning tiledb.Filter. "
"Use `attrs` instead.",
DeprecationWarning,
)
return _filters_from(self.attrs, name, default)


@attrs_.define(frozen=True, kw_only=True, slots=True)
class TileDBWriteOptions:
Expand Down Expand Up @@ -329,8 +272,25 @@ def _dig_platform_config(
# Filter handling and construction.
#

_FILTERS: Mapping[str, Type[tiledb.Filter]] = {
cls.__name__: cls for cls in tiledb.FilterList.filter_type_cc_to_python.values()
_FILTERS: Mapping[str, str] = {
"GzipFilter": "GZIP",
"ZstdFilter": "ZSTD",
"LZ4Filter": "LZ4",
"Bzip2Filter": "BZIP2",
"RleFilter": "RLE",
"DeltaFilter": "DELTA",
"DoubleDeltaFilter": "DOUBLE_DELTA",
"BitWidthReductionFilter": "BIT_WIDTH_REDUCTION",
"BitShuffleFilter": "BITSHUFFLE",
"ByteShuffleFilter": "BYTESHUFFLE",
"PositiveDeltaFilter": "POSITIVE_DELTA",
"ChecksumMD5Filter": "CHECKSUM_MD5",
"ChecksumSHA256Filter": "CHECKSUM_SHA256",
"DictionaryFilter": "DICTIONARY",
"FloatScaleFilter": "SCALE_FLOAT",
"XORFilter": "XOR",
"WebpFilter": "WEBP",
"NoOpFilter": "NONE",
}


Expand All @@ -355,30 +315,3 @@ def _normalize_filter(input: _FilterSpec) -> _DictFilterSpec:
except KeyError as ke:
raise ValueError(f"filter type {typ_name!r} unknown") from ke
return dict(input)


def _filters_from(
col_configs: Mapping[str, _ColumnConfig], name: str, default: Sequence[_FilterSpec]
) -> Tuple[tiledb.Filter, ...]:
"""Constructs the filters for the named column in ``col_configs``."""
try:
cfg = col_configs[name]
except KeyError:
maybe_filters = None
else:
maybe_filters = cfg.filters
if maybe_filters is None:
filters = _normalize_filters(default) or ()
else:
filters = maybe_filters
return tuple(_build_filter(f) for f in filters)


def _build_filter(item: _DictFilterSpec) -> tiledb.Filter:
"""Build a single filter."""
# Always make a copy here so we don't mutate the global state.
# We have validated this earlier so we don't do extra checking here.
kwargs = dict(item)
cls_name = cast(str, kwargs.pop("_type"))
cls = _FILTERS[cls_name]
return cls(**kwargs)
Loading

0 comments on commit 7af640c

Please sign in to comment.