Skip to content

Commit

Permalink
SparseDataset -> CSCDataset, CSRDataset
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 7, 2024
1 parent 03d6c00 commit ce2375d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsoma/io/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
import numpy as np
import pandas as pd
import scipy.sparse as sp
from anndata._core.sparse_dataset import SparseDataset
from anndata.abc import CSCDataset, CSRDataset

from tiledbsoma._types import Metadatum, NPNDArray

SparseMatrix = Union[sp.csr_matrix, sp.csc_matrix, SparseDataset]
SparseMatrix = Union[sp.csr_matrix, sp.csc_matrix, CSCDataset, CSRDataset]
DenseMatrix = Union[NPNDArray, h5py.Dataset]
Matrix = Union[DenseMatrix, SparseMatrix]

Expand Down
19 changes: 10 additions & 9 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import pandas as pd
import pyarrow as pa
import scipy.sparse as sp
from anndata._core.sparse_dataset import SparseDataset
from anndata.abc import CSCDataset
from somacore.options import PlatformConfig
from typing_extensions import get_args

Expand Down Expand Up @@ -1296,7 +1296,7 @@ def _create_from_matrix(
"""
Internal helper for user-facing ``create_from_matrix``.
"""
# SparseDataset has no ndim but it has a shape
# Older SparseDataset has no ndim but it has a shape
if len(matrix.shape) != 2:
raise ValueError(f"expected matrix.shape == 2; got {matrix.shape}")

Expand Down Expand Up @@ -1604,7 +1604,7 @@ def update_matrix(
soma_ndarray: a ``SparseNDArray`` or ``DenseNDArray`` already opened for write.
new_data: If the ``soma_ndarray`` is sparse, a Scipy CSR/CSC matrix or
AnnData ``SparseDataset``. If the ``soma_ndarray`` is dense,
AnnData ``CSCDataset`` / ``CSRDataset``. If the ``soma_ndarray`` is dense,
a NumPy NDArray.
context: Optional :class:`SOMATileDBContext` containing storage parameters, etc.
Expand Down Expand Up @@ -2036,11 +2036,12 @@ def _find_sparse_chunk_size_backed(
these nnz values is quick. This happens when the input is AnnData via
anndata.read_h5ad(name_of_h5ad) without the second backing-mode argument.
* If the input matrix is anndata._core.sparse_dataset.SparseDataset -- which
happens with out-of-core anndata reads -- then getting all these nnz
values is prohibitively expensive. This happens when the input is AnnData
via anndata.read_h5ad(name_of_h5ad, "r") with the second backing-mode
argument, which is necessary for being able to ingest larger H5AD files.
* If the input matrix is ``anndata.abc.CSCDataset`` or
``anndata.abc.CSRDataset`` -- which happens with out-of-core anndata reads
-- then getting all these nnz values is prohibitively expensive. This
happens when the input is AnnData via anndata.read_h5ad(name_of_h5ad, "r")
with the second backing-mode argument, which is necessary for being able
to ingest larger H5AD files.
Say there are 100,000 rows, each with possibly quite different nnz values.
Then in the non-backed case we simply check each row's nnz value. But for
Expand Down Expand Up @@ -2244,7 +2245,7 @@ def _coo_to_table(
if sp.isspmatrix_csc(matrix):
# E.g. if we used anndata.X[:]
stride_axis = 1
if isinstance(matrix, SparseDataset) and matrix.format_str == "csc":
if isinstance(matrix, CSCDataset) and matrix.format_str == "csc":
# E.g. if we used anndata.X without the [:]
stride_axis = 1

Expand Down

0 comments on commit ce2375d

Please sign in to comment.