Skip to content

Commit

Permalink
[python/c++] Remove tiledb-py from unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Oct 11, 2024
1 parent 7fd49a1 commit 2834796
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 207 deletions.
1 change: 1 addition & 0 deletions apis/python/src/tiledbsoma/soma_group.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ void load_soma_group(py::module& m) {
[](SOMAGroup& group) -> bool { return not group.is_open(); })
.def_property_readonly("uri", &SOMAGroup::uri)
.def("context", &SOMAGroup::ctx)
.def("is_relative", &SOMAGroup::is_relative)
.def("has", &SOMAGroup::has)
.def(
"add",
Expand Down
153 changes: 67 additions & 86 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,6 @@
from tiledbsoma._soma_object import SOMAObject
from tiledbsoma.io._common import _TILEDBSOMA_TYPE, UnsDict, UnsMapping

try:
import tiledb

hastiledb = True
except ModuleNotFoundError:
hastiledb = False


from ._util import TESTDATA, assert_adata_equal, make_pd_df


Expand Down Expand Up @@ -272,7 +264,8 @@ def test_named_X_layers(conftest_pbmc_small_h5ad_path, X_layer_name):


def _get_fragment_count(array_uri):
return len(tiledb.fragment.FragmentInfoList(array_uri=array_uri))
fragment_uri = Path(array_uri) / "__fragments"
return len(list(fragment_uri.iterdir())) if fragment_uri.exists() else 0


@pytest.mark.parametrize(
Expand All @@ -283,7 +276,6 @@ def _get_fragment_count(array_uri):
TESTDATA / "pbmc-small-x-csc.h5ad",
],
)
@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed")
def test_resume_mode(resume_mode_h5ad_file):
"""
Makes sure resume-mode ingest after successful ingest of the same input data does not write
Expand Down Expand Up @@ -342,7 +334,6 @@ def test_resume_mode(resume_mode_h5ad_file):


@pytest.mark.parametrize("use_relative_uri", [False, True, None])
@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed")
def test_ingest_relative(conftest_pbmc3k_h5ad_path, use_relative_uri):
tempdir = tempfile.TemporaryDirectory()
output_path = tempdir.name
Expand All @@ -362,37 +353,30 @@ def test_ingest_relative(conftest_pbmc3k_h5ad_path, use_relative_uri):
if use_relative_uri is None:
expected_relative = True # since local disk

exp = tiledbsoma.open(output_path)
with tiledb.Group(exp.uri) as G:
assert G.is_relative("obs") == expected_relative
assert G.is_relative("ms") == expected_relative

with tiledb.Group(exp.ms.uri) as G:
assert G.is_relative("RNA") == expected_relative
with tiledb.Group(exp.ms["RNA"].uri) as G:
assert G.is_relative("var") == expected_relative
assert G.is_relative("X") == expected_relative
with tiledb.Group(exp.ms["RNA"].X.uri) as G:
assert G.is_relative("data") == expected_relative

for collection_name in [
"obsm",
"obsp",
"varm",
]: # conftest_h5ad_file_extended has no varp
with tiledb.Group(exp.ms["RNA"][collection_name].uri) as G:
for member in G:
assert G.is_relative(member.name) == expected_relative

with tiledb.Group(exp.ms.uri) as G:
assert G.is_relative("raw") == expected_relative
with tiledb.Group(exp.ms["raw"].uri) as G:
assert G.is_relative("var") == expected_relative
assert G.is_relative("X") == expected_relative
with tiledb.Group(exp.ms["raw"].X.uri) as G:
assert G.is_relative("data") == expected_relative

exp.close()
with tiledbsoma.Experiment.open(output_path) as G:
assert G._handle._handle.is_relative("obs") == expected_relative
assert G._handle._handle.is_relative("ms") == expected_relative

assert G.ms._handle._handle.is_relative("RNA") == expected_relative
assert G.ms["RNA"]._handle._handle.is_relative("var") == expected_relative
assert G.ms["RNA"]._handle._handle.is_relative("X") == expected_relative
assert G.ms["RNA"].X._handle._handle.is_relative("data") == expected_relative

for collection_name in [
"obsm",
"obsp",
"varm",
]: # conftest_h5ad_file_extended has no varp
for member in G.ms["RNA"][collection_name]:
assert (
G.ms["RNA"][collection_name]._handle._handle.is_relative(member)
== expected_relative
)

assert G.ms._handle._handle.is_relative("raw") == expected_relative
assert G.ms["raw"]._handle._handle.is_relative("var") == expected_relative
assert G.ms["raw"]._handle._handle.is_relative("X") == expected_relative
assert G.ms["raw"].X._handle._handle.is_relative("data") == expected_relative


@pytest.mark.parametrize("ingest_uns_keys", [["louvain_colors"], None])
Expand Down Expand Up @@ -745,7 +729,6 @@ def check(tdbo: SOMAObject):
check(raw.X)


@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed")
def test_null_obs(conftest_pbmc_small, tmp_path: Path):
output_path = tmp_path.as_uri()
seed = 42
Expand Down Expand Up @@ -777,25 +760,24 @@ def test_null_obs(conftest_pbmc_small, tmp_path: Path):
)
assert_adata_equal(original, conftest_pbmc_small)

exp = tiledbsoma.Experiment.open(uri)
with tiledb.open(exp.obs.uri, "r") as obs:
# Explicitly check columns created above
assert obs.attr("empty_categorical_all").isnullable
assert obs.attr("empty_categorical_partial").isnullable
assert obs.attr("empty_extension_all").isnullable
assert obs.attr("empty_extension_partial").isnullable
# For every column in the data frame
# ensure that `isnullable` reflects the null-ness
# of the Pandas data frame
with tiledbsoma.Experiment.open(uri) as exp:
schema = exp.obs.schema.field

# Explicitly check columns created above
assert schema("empty_categorical_all").nullable
assert schema("empty_categorical_partial").nullable
assert schema("empty_extension_all").nullable
assert schema("empty_extension_partial").nullable

# For every column in the data frame ensure that `isnullable` reflects
# he null-ness of the Pandas data frame
for k in conftest_pbmc_small.obs:
assert obs.attr(k).isnullable
assert schema(k).nullable


@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed")
def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):
adata = anndata.read_h5ad(h5ad_file_with_obsm_holes.as_posix())
original = adata.copy()
assert 1 == 1

# This data file is prepared such that obsm["X_pca"] has shape (2638, 50)
# but its [0][0] element is a 0, so when it's stored as sparse, its nnz
Expand All @@ -808,48 +790,47 @@ def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):

assert_adata_equal(original, adata)

exp = tiledbsoma.Experiment.open(output_path)

# Verify the bounding box on the SOMA SparseNDArray
with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri) as so:
assert so.meta["soma_dim_0_domain_lower"] == 0
assert so.meta["soma_dim_0_domain_upper"] == 2637
assert so.meta["soma_dim_1_domain_lower"] == 0
assert so.meta["soma_dim_1_domain_upper"] == 49
with tiledbsoma.Experiment.open(output_path) as exp:
meta = exp.ms["RNA"].obsm["X_pca"].metadata
assert meta["soma_dim_0_domain_lower"] == 0
assert meta["soma_dim_0_domain_upper"] == 2637
assert meta["soma_dim_1_domain_lower"] == 0
assert meta["soma_dim_1_domain_upper"] == 49

# With the bounding box present, all is well for outgest to AnnData format.
try1 = tiledbsoma.io.to_anndata(exp, "RNA")
assert try1.obsm["X_pca"].shape == (2638, 50)
# With the bounding box present, all is well for outgest to AnnData format.
try1 = tiledbsoma.io.to_anndata(exp, "RNA")
assert try1.obsm["X_pca"].shape == (2638, 50)

# Now remove the bounding box to simulate reading older data that lacks a bounding box.
with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri, "w") as so:
del so.meta["soma_dim_0_domain_lower"]
del so.meta["soma_dim_0_domain_upper"]
del so.meta["soma_dim_1_domain_lower"]
del so.meta["soma_dim_1_domain_upper"]
with tiledbsoma.Experiment.open(output_path, "w") as exp:
meta = exp.ms["RNA"].obsm["X_pca"].metadata
del meta["soma_dim_0_domain_lower"]
del meta["soma_dim_0_domain_upper"]
del meta["soma_dim_1_domain_lower"]
del meta["soma_dim_1_domain_upper"]

# Re-open to simulate opening afresh a bounding-box-free array.
exp = tiledbsoma.Experiment.open(output_path)

with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri) as so:
with tiledbsoma.Experiment.open(output_path) as exp:
meta = exp.ms["RNA"].obsm["X_pca"].metadata
with pytest.raises(KeyError):
so.meta["soma_dim_0_domain_lower"]
meta["soma_dim_0_domain_lower"]
with pytest.raises(KeyError):
so.meta["soma_dim_0_domain_upper"]
meta["soma_dim_0_domain_upper"]
with pytest.raises(KeyError):
so.meta["soma_dim_1_domain_lower"]
meta["soma_dim_1_domain_lower"]
with pytest.raises(KeyError):
so.meta["soma_dim_1_domain_upper"]
assert so.meta["soma_object_type"] == "SOMASparseNDArray"
meta["soma_dim_1_domain_upper"]
assert meta["soma_object_type"] == "SOMASparseNDArray"

# Now try the remaining options for outgest.
with pytest.raises(tiledbsoma.SOMAError):
tiledbsoma.io.to_anndata(exp, "RNA")
# Now try the remaining options for outgest.
with pytest.raises(tiledbsoma.SOMAError):
tiledbsoma.io.to_anndata(exp, "RNA")

try3 = tiledbsoma.io.to_anndata(
exp, "RNA", obsm_varm_width_hints={"obsm": {"X_pca": 50}}
)
assert try3.obsm["X_pca"].shape == (2638, 50)
try3 = tiledbsoma.io.to_anndata(
exp, "RNA", obsm_varm_width_hints={"obsm": {"X_pca": 50}}
)
assert try3.obsm["X_pca"].shape == (2638, 50)


def test_X_empty(h5ad_file_X_empty):
Expand Down
28 changes: 0 additions & 28 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@

import tiledbsoma as soma

try:
import tiledb

hastiledb = True
except ModuleNotFoundError:
hastiledb = False


from tests._util import raises_no_typeguard


Expand Down Expand Up @@ -1602,26 +1594,6 @@ def test_enum_schema_report(tmp_path):
arrow_table = pa.Table.from_pandas(pandas_df, preserve_index=False)
sdf.write(arrow_table)

# Double-check against TileDB-Py reporting
if hastiledb:
with tiledb.open(uri) as A:
for i in range(A.schema.nattr):
attr = A.schema.attr(i)
try:
index_type = attr.dtype
value_type = A.enum(attr.name).dtype
except tiledb.cc.TileDBError:
pass # not an enum attr
if attr.name == "int_cat":
assert index_type.name == "int8"
assert value_type.name == "int64"
elif attr.name == "str_cat":
assert index_type.name == "int8"
assert value_type.name == "str32"
elif attr.name == "byte_cat":
assert index_type.name == "int8"
assert value_type.name == "bytes8"

# Verify SOMA Arrow schema
with soma.open(uri) as sdf:
f = sdf.schema.field("int_cat")
Expand Down
Loading

0 comments on commit 2834796

Please sign in to comment.