Skip to content

Commit

Permalink
Rename filter_list -> filters
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Oct 11, 2024
1 parent 7d85a1e commit 7fd49a1
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 88 deletions.
6 changes: 5 additions & 1 deletion apis/python/src/tiledbsoma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,11 @@ void load_soma_array(py::module& m) {

.def_property_readonly("dimension_names", &SOMAArray::dimension_names)

.def("consolidate_and_vacuum", &SOMAArray::consolidate_and_vacuum)
.def(
"consolidate_and_vacuum",
&SOMAArray::consolidate_and_vacuum,
py::arg(
"modes") = std::vector<std::string>{"fragment_meta", "commits"})

.def_property_readonly(
"meta",
Expand Down
70 changes: 34 additions & 36 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
import datetime
import os
import json
from pathlib import Path
from typing import Dict, List

import numpy as np
Expand Down Expand Up @@ -145,13 +146,15 @@ def test_dataframe(tmp_path, arrow_schema):
assert [e.as_py() for e in table["baz"]] == pydict["baz"]
assert [e.as_py() for e in table["quux"]] == pydict["quux"]

# Validate TileDB array schema
if hastiledb:
with tiledb.open(uri) as A:
assert A.schema.sparse
assert not A.schema.allows_duplicates
assert A.dim("foo").filters == [tiledb.ZstdFilter(level=3)]
assert A.attr("bar").filters == [tiledb.ZstdFilter()]
with soma.DataFrame.open(uri) as A:
cfg = A.config_options_from_schema()
assert not cfg.allows_duplicates
assert json.loads(cfg.dims)["foo"]["filters"] == [
{"COMPRESSION_LEVEL": 3, "name": "ZSTD"}
]
assert json.loads(cfg.attrs)["bar"]["filters"] == [
{"COMPRESSION_LEVEL": -1, "name": "ZSTD"}
]

with soma.DataFrame.open(uri) as sdf:
assert sdf.count == 5
Expand Down Expand Up @@ -1054,34 +1057,27 @@ def test_result_order(tmp_path):
next(sdf.read(result_order="bogus"))


@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed")
@pytest.mark.parametrize(
"create_options,expected_schema_fields",
(
(
{"allows_duplicates": True},
{
"validity_filters": (
tiledb.FilterList([tiledb.RleFilter()]) if hastiledb else None
),
"validity_filters": [{"COMPRESSION_LEVEL": -1, "name": "RLE"}],
"allows_duplicates": True,
},
),
(
{"allows_duplicates": False},
{
"validity_filters": (
tiledb.FilterList([tiledb.RleFilter()]) if hastiledb else None
),
"validity_filters": [{"COMPRESSION_LEVEL": -1, "name": "RLE"}],
"allows_duplicates": False,
},
),
(
{"validity_filters": ["NoOpFilter"], "allows_duplicates": False},
{
"validity_filters": (
tiledb.FilterList([tiledb.NoOpFilter()]) if hastiledb else None
),
"validity_filters": [{"name": "NOOP"}],
"allows_duplicates": False,
},
),
Expand All @@ -1096,14 +1092,17 @@ def test_create_platform_config_overrides(
schema=pa.schema([pa.field("colA", pa.string())]),
platform_config={"tiledb": {"create": {**create_options}}},
).close()
with tiledb.open(uri) as D:
for k, v in expected_schema_fields.items():
assert getattr(D.schema, k) == v

with soma.DataFrame.open(tmp_path.as_posix()) as A:
cfg = A.config_options_from_schema()
assert expected_schema_fields["validity_filters"] == json.loads(
cfg.validity_filters
)
assert expected_schema_fields["allows_duplicates"] == cfg.allows_duplicates


@pytest.mark.parametrize("allows_duplicates", [False, True])
@pytest.mark.parametrize("consolidate", [False, True])
@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed")
def test_timestamped_ops(tmp_path, allows_duplicates, consolidate):
uri = tmp_path.as_posix()

Expand Down Expand Up @@ -1140,21 +1139,21 @@ def test_timestamped_ops(tmp_path, allows_duplicates, consolidate):
"float": [200.2, 300.3],
"string": ["ball", "cat"],
}
sidf.write(pa.Table.from_pydict(data))

# Without consolidate:
# * There are two fragments:
# o One with tiledb.fragment.FragmentInfoList[i].timestamp_range = (10, 10)
# o One with tiledb.fragment.FragmentInfoList[i].timestamp_range = (20, 20)
# With consolidate:
# * There is one fragment:
# o One with tiledb.fragment.FragmentInfoList[i].timestamp_range = (10, 20)
sidf.write(
pa.Table.from_pydict(data),
soma.TileDBWriteOptions(consolidate_and_vacuum=consolidate),
)
assert sidf.tiledb_timestamp_ms == 1615403005000
assert sidf.tiledb_timestamp.isoformat() == "2021-03-10T19:03:25+00:00"

# Without consolidate:
# * There are two fragments:
# o One with tiledb.fragment.FragmentInfoList[i].timestamp_range = (10, 10)
# o One with tiledb.fragment.FragmentInfoList[i].timestamp_range = (20, 20)
# With consolidate:
# * There is one fragment:
# o One with tiledb.fragment.FragmentInfoList[i].timestamp_range = (10, 20)
if consolidate:
tiledb.consolidate(uri)
tiledb.vacuum(uri)

# read without timestamp (i.e., after final write) & see final image
with soma.DataFrame.open(uri) as sidf:
table = sidf.read().concat()
Expand Down Expand Up @@ -1735,10 +1734,9 @@ def test_only_evolve_schema_when_enmr_is_extended(tmp_path):

# total 3 fragment files

vfs = tiledb.VFS()
# subtract 1 for the __schema/__enumerations directory;
# only looking at fragment files
assert len(vfs.ls(os.path.join(uri, "__schema"))) - 1 == 3
assert len(list((Path(uri) / "__schema").iterdir())) - 1 == 3


def test_fix_update_dataframe_with_var_strings(tmp_path):
Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_dense_nd_array.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import contextlib
import datetime
import json
import pathlib
from typing import Tuple

import numpy as np
import pyarrow as pa
import pytest
import json

import tiledbsoma as soma
from tiledbsoma.options import SOMATileDBContext
Expand Down Expand Up @@ -494,4 +494,4 @@ def test_read_to_unwritten_array(tmp_path, shape):
soma.DenseNDArray.create(uri, type=pa.uint8(), shape=shape)

with soma.DenseNDArray.open(uri, "r") as A:
assert np.array_equal(np.ones(shape)*255, A.read().to_numpy())
assert np.array_equal(np.ones(shape) * 255, A.read().to_numpy())
71 changes: 28 additions & 43 deletions apis/python/tests/test_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import contextlib
import datetime
import itertools
import json
import operator
import pathlib
import sys
Expand All @@ -19,13 +20,6 @@
from tiledbsoma import _factory
from tiledbsoma.options import SOMATileDBContext

try:
import tiledb

hastiledb = True
except ModuleNotFoundError:
hastiledb = False

from . import NDARRAY_ARROW_TYPES_NOT_SUPPORTED, NDARRAY_ARROW_TYPES_SUPPORTED
from ._util import raises_no_typeguard

Expand Down Expand Up @@ -331,11 +325,9 @@ def test_sparse_nd_array_read_write_sparse_tensor(

assert t.shape == shape

# Validate TileDB array schema
if hastiledb:
with tiledb.open(tmp_path.as_posix()) as A:
assert A.schema.sparse
assert not A.schema.allows_duplicates
with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates


@pytest.mark.parametrize("shape", [(10,), (23, 4), (5, 3, 1), (8, 4, 2, 30)])
Expand All @@ -357,11 +349,9 @@ def test_sparse_nd_array_read_write_table(
assert isinstance(t, pa.Table)
assert tables_are_same_value(data, t)

# Validate TileDB array schema
if hastiledb:
with tiledb.open(tmp_path.as_posix()) as A:
assert A.schema.sparse
assert not A.schema.allows_duplicates
with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates


@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64])
Expand All @@ -387,11 +377,9 @@ def test_sparse_nd_array_read_as_pandas(
data.to_pandas().sort_values(by=dim_names, ignore_index=True)
)

# Validate TileDB array schema
if hastiledb:
with tiledb.open(tmp_path.as_posix()) as A:
assert A.schema.sparse
assert not A.schema.allows_duplicates
with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates


@pytest.mark.parametrize("shape_is_nones", [True, False])
Expand Down Expand Up @@ -1096,14 +1084,14 @@ def test_tile_extents(tmp_path):
},
).close()

if hastiledb:
with tiledb.open(uri) as A:
if soma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
assert A.schema.domain.dim(0).tile == 2048
assert A.schema.domain.dim(1).tile == 2048
else:
assert A.schema.domain.dim(0).tile == 100
assert A.schema.domain.dim(1).tile == 2048
with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
dim_info = json.loads(A.config_options_from_schema().dims)
if soma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
assert int(dim_info["soma_dim_0"]["tile"]) == 2048
assert int(dim_info["soma_dim_1"]["tile"]) == 2048
else:
assert int(dim_info["soma_dim_0"]["tile"]) == 100
assert int(dim_info["soma_dim_1"]["tile"]) == 2048


@pytest.mark.parametrize(
Expand All @@ -1112,33 +1100,26 @@ def test_tile_extents(tmp_path):
(
{"allows_duplicates": True},
{
"validity_filters": (
tiledb.FilterList([tiledb.RleFilter()]) if hastiledb else None
),
"validity_filters": [{"COMPRESSION_LEVEL": -1, "name": "RLE"}],
"allows_duplicates": True,
},
),
(
{"allows_duplicates": False},
{
"validity_filters": (
tiledb.FilterList([tiledb.RleFilter()]) if hastiledb else None
),
"validity_filters": [{"COMPRESSION_LEVEL": -1, "name": "RLE"}],
"allows_duplicates": False,
},
),
(
{"validity_filters": ["NoOpFilter"], "allows_duplicates": False},
{
"validity_filters": (
tiledb.FilterList([tiledb.NoOpFilter()]) if hastiledb else None
),
"validity_filters": [{"name": "NOOP"}],
"allows_duplicates": False,
},
),
),
)
@pytest.mark.skipif(not hastiledb, reason="tiledb-py not installed")
def test_create_platform_config_overrides(
tmp_path, create_options, expected_schema_fields
):
Expand All @@ -1149,9 +1130,13 @@ def test_create_platform_config_overrides(
shape=(100, 100),
platform_config={"tiledb": {"create": {**create_options}}},
).close()
with tiledb.open(uri) as D:
for k, v in expected_schema_fields.items():
assert getattr(D.schema, k) == v

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
cfg = A.config_options_from_schema()
assert expected_schema_fields["validity_filters"] == json.loads(
cfg.validity_filters
)
assert expected_schema_fields["allows_duplicates"] == cfg.allows_duplicates


def test_timestamped_ops(tmp_path):
Expand Down
4 changes: 2 additions & 2 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ json ArrowAdapter::_get_attrs_filter_list_json(
json attrs_filter_list_as_json;
for (const auto& attr : tiledb_schema.attributes()) {
json attr_info = {
{"filter_list", _get_filter_list_json(attr.second.filter_list())}};
{"filters", _get_filter_list_json(attr.second.filter_list())}};
attrs_filter_list_as_json.emplace(attr.first, attr_info);
}
return attrs_filter_list_as_json;
Expand All @@ -223,7 +223,7 @@ json ArrowAdapter::_get_dims_list_json(const ArraySchema& tiledb_schema) {
for (const auto& dim : tiledb_schema.domain().dimensions()) {
json dim_info = {
{"tile", dim.tile_extent_to_str()},
{"filter_list", _get_filter_list_json(dim.filter_list())}};
{"filters", _get_filter_list_json(dim.filter_list())}};
dims_as_json.emplace(dim.name(), dim_info);
}
return dims_as_json;
Expand Down
7 changes: 3 additions & 4 deletions libtiledbsoma/test/unit_soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -382,12 +382,11 @@ TEST_CASE_METHOD(
Filter::to_str(filter.second));
if (filter.second != TILEDB_FILTER_WEBP) {
REQUIRE(
json::parse(config_options.attrs)["a0"]["filter_list"][0]
.at("name") == Filter::to_str(filter.second));
json::parse(config_options.attrs)["a0"]["filters"][0].at(
"name") == Filter::to_str(filter.second));
}
REQUIRE(
json::parse(
config_options.dims)["soma_joinid"]["filter_list"][0]
json::parse(config_options.dims)["soma_joinid"]["filters"][0]
.at("name") == Filter::to_str(TILEDB_FILTER_ZSTD));

sdf->close();
Expand Down

0 comments on commit 7fd49a1

Please sign in to comment.