From 2875f7dbb73035089db5962bd227e57a75ed3b9c Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 7 Oct 2024 15:49:08 -0400 Subject: [PATCH] [python] Drop `anndata` version pin; use `CSCDataset`/`CSRDataset` (#3141) * [python] Drop `anndata` version pin * SparseDataset -> CSCDataset, CSRDataset --- apis/python/setup.py | 3 +-- apis/python/src/tiledbsoma/io/_common.py | 4 ++-- apis/python/src/tiledbsoma/io/ingest.py | 19 ++++++++++--------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/apis/python/setup.py b/apis/python/setup.py index ce28d534ed..e1cb167f8e 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -326,8 +326,7 @@ def run(self): zip_safe=False, setup_requires=["pybind11"], install_requires=[ - # Tracked in https://github.com/single-cell-data/TileDB-SOMA/issues/1785 - "anndata != 0.10.0", + "anndata>=0.10.1", "attrs>=22.2", "numba>=0.58.0", "numpy<2.0", diff --git a/apis/python/src/tiledbsoma/io/_common.py b/apis/python/src/tiledbsoma/io/_common.py index 41e0c7053a..8895c20d6a 100644 --- a/apis/python/src/tiledbsoma/io/_common.py +++ b/apis/python/src/tiledbsoma/io/_common.py @@ -10,11 +10,11 @@ import numpy as np import pandas as pd import scipy.sparse as sp -from anndata._core.sparse_dataset import SparseDataset +from anndata.experimental import CSCDataset, CSRDataset from tiledbsoma._types import Metadatum, NPNDArray -SparseMatrix = Union[sp.csr_matrix, sp.csc_matrix, SparseDataset] +SparseMatrix = Union[sp.csr_matrix, sp.csc_matrix, CSCDataset, CSRDataset] DenseMatrix = Union[NPNDArray, h5py.Dataset] Matrix = Union[DenseMatrix, SparseMatrix] diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index e67da5f123..caccd0a699 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -36,7 +36,7 @@ import pandas as pd import pyarrow as pa import scipy.sparse as sp -from anndata._core.sparse_dataset import SparseDataset +from anndata.experimental import CSCDataset from somacore.options import PlatformConfig from typing_extensions import get_args @@ -1296,7 +1296,7 @@ def _create_from_matrix( """ Internal helper for user-facing ``create_from_matrix``. """ - # SparseDataset has no ndim but it has a shape + # Older SparseDataset has no ndim but it has a shape if len(matrix.shape) != 2: raise ValueError(f"expected matrix.shape == 2; got {matrix.shape}") @@ -1604,7 +1604,7 @@ def update_matrix( soma_ndarray: a ``SparseNDArray`` or ``DenseNDArray`` already opened for write. new_data: If the ``soma_ndarray`` is sparse, a Scipy CSR/CSC matrix or - AnnData ``SparseDataset``. If the ``soma_ndarray`` is dense, + AnnData ``CSCDataset`` / ``CSRDataset``. If the ``soma_ndarray`` is dense, a NumPy NDArray. context: Optional :class:`SOMATileDBContext` containing storage parameters, etc. @@ -2036,11 +2036,12 @@ def _find_sparse_chunk_size_backed( these nnz values is quick. This happens when the input is AnnData via anndata.read_h5ad(name_of_h5ad) without the second backing-mode argument. - * If the input matrix is anndata._core.sparse_dataset.SparseDataset -- which - happens with out-of-core anndata reads -- then getting all these nnz - values is prohibitively expensive. This happens when the input is AnnData - via anndata.read_h5ad(name_of_h5ad, "r") with the second backing-mode - argument, which is necessary for being able to ingest larger H5AD files. + * If the input matrix is ``anndata.abc.CSCDataset`` or + ``anndata.abc.CSRDataset`` -- which happens with out-of-core anndata reads + -- then getting all these nnz values is prohibitively expensive. This + happens when the input is AnnData via anndata.read_h5ad(name_of_h5ad, "r") + with the second backing-mode argument, which is necessary for being able + to ingest larger H5AD files. Say there are 100,000 rows, each with possibly quite different nnz values. Then in the non-backed case we simply check each row's nnz value. But for @@ -2244,7 +2245,7 @@ def _coo_to_table( if sp.isspmatrix_csc(matrix): # E.g. if we used anndata.X[:] stride_axis = 1 - if isinstance(matrix, SparseDataset) and matrix.format_str == "csc": + if isinstance(matrix, CSCDataset) and matrix.format_str == "csc": # E.g. if we used anndata.X without the [:] stride_axis = 1