From 9fd1401b28c83f56e6e03524e92f40c7ea8c3333 Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Mon, 28 Oct 2024 10:51:43 -0400 Subject: [PATCH 01/10] Update somacore version for Python API and pre-commit --- .pre-commit-config.yaml | 2 +- apis/python/setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 607e5747b3..beb7305135 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # Pandas 2.x types (e.g. `pd.Series[Any]`). See `_types.py` or https://github.com/single-cell-data/TileDB-SOMA/issues/2839 # for more info. - "pandas-stubs>=2" - - "somacore==1.0.22" + - "somacore @ git+https://github.com/single-cell-data/SOMA.git@08b795e" # DO NOT MERGE - types-setuptools args: ["--config-file=apis/python/pyproject.toml", "apis/python/src", "apis/python/devtools"] pass_filenames: false diff --git a/apis/python/setup.py b/apis/python/setup.py index 81e2d10273..2a6057b2b7 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -335,8 +335,8 @@ def run(self): "pyarrow", "scanpy>=1.9.2", "scipy", - # Note: the somacore version is also pinned in .pre-commit-config.yaml - "somacore==1.0.22", + # Note: the somacore version is in .pre-commit-config.yaml too + "somacore @ git+https://github.com/single-cell-data/SOMA.git@08b795e", # DO NOT MERGE "typing-extensions", # Note "-" even though `import typing_extensions` ], extras_require={ From 353ee9610b1a79ce645f548172b02774a0fe65d9 Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Tue, 29 Oct 2024 15:57:49 -0400 Subject: [PATCH 02/10] (WIP) Update the multiscale image --- apis/python/src/tiledbsoma/_constants.py | 6 + .../src/tiledbsoma/_multiscale_image.py | 539 +++++++++--------- apis/python/src/tiledbsoma/_scene.py | 43 +- apis/python/src/tiledbsoma/_spatial_util.py | 12 +- 4 files changed, 292 insertions(+), 308 deletions(-) diff --git a/apis/python/src/tiledbsoma/_constants.py b/apis/python/src/tiledbsoma/_constants.py index 3e139ea728..d0d9dd1f8c 100644 --- a/apis/python/src/tiledbsoma/_constants.py +++ b/apis/python/src/tiledbsoma/_constants.py @@ -6,14 +6,20 @@ """Global package constants """ + SOMA_JOINID = "soma_joinid" SOMA_GEOMETRY = "soma_geometry" SOMA_COORDINATE_SPACE_METADATA_KEY = "soma_coordinate_space" SOMA_MULTISCALE_IMAGE_SCHEMA = "soma_multiscale_image_schema" SOMA_OBJECT_TYPE_METADATA_KEY = "soma_object_type" + SOMA_ENCODING_VERSION_METADATA_KEY = "soma_encoding_version" SOMA_ENCODING_VERSION = "1.1.0" +SOMA_SPATIAL_VERSION_METADATA_KEY = "soma_spatial_encoding_version" +sOMA_SPATIAL_ENCODING_VERSION = "0.1.0" + + SPATIAL_DISCLAIMER = ( "Support for spatial types is experimental. Changes to both the API and data " "storage may not be backwards compatible." diff --git a/apis/python/src/tiledbsoma/_multiscale_image.py b/apis/python/src/tiledbsoma/_multiscale_image.py index f60418a638..bce879e35e 100644 --- a/apis/python/src/tiledbsoma/_multiscale_image.py +++ b/apis/python/src/tiledbsoma/_multiscale_image.py @@ -28,6 +28,7 @@ from ._constants import ( SOMA_COORDINATE_SPACE_METADATA_KEY, SOMA_MULTISCALE_IMAGE_SCHEMA, + SOMA_SPATIAL_VERSION_METADATA_KEY, SPATIAL_DISCLAIMER, ) from ._dense_nd_array import DenseNDArray @@ -45,58 +46,39 @@ @attrs.define(frozen=True) -class ImageProperties: - """Properties for a single resolution level in a multiscale image. - - Lifecycle: - Experimental. - """ +class _LevelProperties: + """Properties for a single resolution level in a multiscale image.""" name: str - image_type: str - shape: Tuple[int, ...] = attrs.field(converter=tuple) - width: int = attrs.field(init=False) - height: int = attrs.field(init=False) - depth: Optional[int] = attrs.field(init=False) - nchannels: Optional[int] = attrs.field(init=False) - - def __attrs_post_init__(self): # type: ignore[no-untyped-def] - if len(self.image_type) != len(set(self.image_type)): - raise ValueError( - f"Invalid image type '{self.image_type}'. Image type cannot contain " - f"repeated values." - ) - if len(self.image_type) != len(self.shape): - raise ValueError( - f"{len(self.image_type)} axis names must be provided for a multiscale " - f"image with image type {self.image_type}." - ) + shape: Tuple[int, ...] - nchannels: Optional[int] = None - width: Optional[int] = None - height: Optional[int] = None - depth: Optional[int] = None - for val, size in zip(self.image_type, self.shape): - if val == "X": - width = size - elif val == "Y": - height = size - elif val == "Z": - depth = size - elif val == "C": - nchannels = size - else: - raise SOMAError(f"Invalid image type '{self.image_type}'") - if width is None or height is None: - raise ValueError( - f"Invalid image type '{self.image_type}'. Image type must include " - f"'X' and 'Y'." - ) - object.__setattr__(self, "nchannels", nchannels) - object.__setattr__(self, "width", width) - object.__setattr__(self, "height", height) - object.__setattr__(self, "depth", depth) +@attrs.define(frozen=True) +class _MultiscaleImageMetadata: + """Helper class for reading/writing multiscale image metadata.""" + + data_axis_permutation: Tuple[int, ...] + has_channel_axis: bool + shape: Tuple[int, ...] + datatype: pa.DataType + + def to_json(self) -> str: + type_str = pyarrow_to_carrow_type(self.datatype) + return json.dumps( + { + "data_axis_permutation": self.data_axis_permutation, + "shape": self.shape, + "has_channel_axis": self.has_channel_axis, + "datatype": type_str, + } + ) + + @classmethod + def from_json(cls, data: str) -> Self: + kwargs = json.loads(data) + type_str = kwargs.pop("datatype") + type = carrow_type_to_pyarrow(type_str) + return cls(datatype=type, **kwargs) class MultiscaleImage( # type: ignore[misc] # __eq__ false positive @@ -114,7 +96,13 @@ class MultiscaleImage( # type: ignore[misc] # __eq__ false positive Experimental. """ - __slots__ = ("_schema", "_coord_space", "_levels") + __slots__ = ( + "_coord_space", + "_data_axis_permutation", + "_datatype", + "_has_channel_axis", + "_levels", + ) _wrapper_type = _tdb_handles.MultiscaleImageWrapper _level_prefix: Final = "soma_level_" @@ -127,9 +115,15 @@ def create( uri: str, *, type: pa.DataType, - reference_level_shape: Sequence[int], - axis_names: Sequence[str] = ("c", "y", "x"), - axis_types: Sequence[str] = ("channel", "height", "width"), + level_shape: Sequence[int], + level_key: str = "level0", + level_uri: Optional[str] = None, + coordinate_space: Union[Sequence[str], CoordinateSpace] = ( + "x", + "y", + ), + data_axis_order: Optional[Sequence[str]] = None, + has_channel_axis: bool = True, platform_config: Optional[options.PlatformConfig] = None, context: Optional[SOMATileDBContext] = None, tiledb_timestamp: Optional[OpenTimestamp] = None, @@ -138,16 +132,29 @@ def create( Args: uri: The URI where the collection will be created. - reference_level_shape: The shape of the reference level for the multiscale - image. In most cases, this corresponds to the size of the image - at ``level=0``. - axis_names: The names of the axes of the image. - axis_types: The types of the axes of the image. Must be the same length as - ``axis_names``. Valid types are: ``channel``, ``height``, ``width``, - and ``depth``. + type: The Arrow type to store the image data in the array. + If the type is unsupported, an error will be raised. + level_shape: The shape of the multiscale image for ``level=0``. Must + include the channel dimension if there is one. + level_key: The name for the ``level=0`` image. Defaults to ``level0``. + level_uri: The URI for the ``level=0`` image. If the URI is an existing + SOMADenseNDArray it must match have the shape provided by + ``level_shape`` and type specified in ``type. If set to ``None``, the + ``level_key`` will be used to construct a default child URI. For more + on URIs see :meth:`collection.Collection.add_new_collction`. + coordinate_space: Either the coordinate space or the axis names for the + coordinate space the ``level=0`` image is defined on. This does not + include the channel dimension, only spatial dimensions. + data_axis_order: The order of the axes as stored on disk. Use + ``soma_channel`` to specify the location of a channel axis. If no + axis is provided, this defaults to the channel axis followed by the + coordinate space axes in reverse order (e.g. + ``("soma_channel", "y", "x")`` if ``coordinate_space=("x", "y")``). + Returns: - The newly created ``MultiscaleImage``, opened for writing. + The newly created ``MultiscaleImage``, with the initial image array open + for writing. Lifecycle: Experimental. @@ -156,34 +163,38 @@ def create( warnings.warn(SPATIAL_DISCLAIMER) context = _validate_soma_tiledb_context(context) - if len(set(axis_types)) != len(axis_types): - raise ValueError( - "Invalid axis types {axis_types} - cannot have repeated values." - ) - if len(axis_names) != len(axis_types): - raise ValueError("Mismatched lengths for axis names and types.") - axis_type_map = {"channel": "C", "height": "Y", "width": "X", "depth": "Z"} - image_type = [] - for val in axis_types: - try: - image_type.append(axis_type_map[val]) - except KeyError as ke: - raise ValueError(f"Invalid axis type name '{val}'.") from ke - schema = MultiscaleImageSchema( - ImageProperties( - name="reference_level", - image_type="".join(image_type), - shape=tuple(reference_level_shape), # type: ignore - ), - axis_names=tuple(axis_names), + + # Create the coordinate space. + if not isinstance(coordinate_space, CoordinateSpace): + coordinate_space = CoordinateSpace.from_axis_names(coordinate_space) + + ndim = len(coordinate_space) + if has_channel_axis: + ndim += 1 + + if len(level_shape) != ndim: + raise ValueError() # TODO: Add error + if data_axis_order is None: + axis_permutation = tuple(range(ndim - 1, -1, -1)) + else: + axis_indices = { + name: index for index, name in enumerate(coordinate_space.axis_names) + } + if has_channel_axis: + axis_indices["soma_channel"] = len(coordinate_space) + if set(data_axis_order) != set(axis_indices.keys()): + raise ValueError() # TODO: Add error + axis_permutation = tuple(axis_indices[name] for name in data_axis_order) + + image_meta = _MultiscaleImageMetadata( + data_axis_permutation=axis_permutation, + has_channel_axis=has_channel_axis, + shape=tuple(level_shape), datatype=type, ) - coord_space = CoordinateSpace.from_axis_names( - schema.get_coordinate_space_axis_names() - ) - schema_str = schema.to_json() - coord_space_str = coordinate_space_to_json(coord_space) + _image_meta_str = image_meta.to_json() + coord_space_str = coordinate_space_to_json(coordinate_space) try: timestamp_ms = context._open_timestamp_ms(tiledb_timestamp) clib.SOMAGroup.create( @@ -195,7 +206,7 @@ def create( handle = _tdb_handles.MultiscaleImageWrapper.open( uri, "w", context, tiledb_timestamp ) - handle.metadata[SOMA_MULTISCALE_IMAGE_SCHEMA] = schema_str + handle.metadata[SOMA_MULTISCALE_IMAGE_SCHEMA] = _image_meta_str handle.metadata[SOMA_COORDINATE_SPACE_METADATA_KEY] = coord_space_str return cls( handle, @@ -212,19 +223,20 @@ def __init__( # Do generic SOMA collection initialization. super().__init__(handle, **kwargs) - # Get schema for the multiscale image. try: - schema_json = self.metadata[SOMA_MULTISCALE_IMAGE_SCHEMA] + spatial_encoding_version = self.metadata[SOMA_SPATIAL_VERSION_METADATA_KEY] + if isinstance(spatial_encoding_version, bytes): + spatial_encoding_version = str(spatial_encoding_version, "utf-8") + if spatial_encoding_version != "0.1.0": + raise ValueError( + f"Unsupported MultiscaleImage with spatial encoding version " + f"{spatial_encoding_version}" + ) except KeyError as ke: - raise SOMAError("Missing multiscale image schema metadata") from ke - if isinstance(schema_json, bytes): - schema_json = str(schema_json, "utf-8") - if not isinstance(schema_json, str): raise SOMAError( - f"Stored '{SOMA_MULTISCALE_IMAGE_SCHEMA}' metadata is unexpected " - f"type {type(schema_json)!r}." - ) - self._schema = MultiscaleImageSchema.from_json(schema_json) + "Missing spatial encoding version. May be deprecated experimental " + "MultiscaleImage." + ) from ke # Get the coordinate space. try: @@ -233,23 +245,34 @@ def __init__( raise SOMAError("Missing coordinate space metadata") from ke self._coord_space = coordinate_space_from_json(coord_space) - # Check schema and coordinate space have the same axis order - schema_axes = self._schema.get_coordinate_space_axis_names() - if schema_axes != self._coord_space.axis_names: + # Get the multiscale image specific metadata. + try: + metadata_json = self.metadata[SOMA_MULTISCALE_IMAGE_SCHEMA] + except KeyError as ke: + raise SOMAError("Missing multiscale image schema metadata") from ke + if isinstance(metadata_json, bytes): + metadata_json = str(metadata_json, "utf-8") + if not isinstance(metadata_json, str): raise SOMAError( - f"Inconsistent axis names stored in metadata. Multiscale schema metadata" - f" has coordinate axes '{schema_axes}', but the coordinate space " - f"metadata has coordinate axes '{self._coord_space.axis_names}'" + f"Stored '{SOMA_MULTISCALE_IMAGE_SCHEMA}' metadata is unexpected " + f"type {type(metadata_json)!r}." ) + image_meta = _MultiscaleImageMetadata.from_json(metadata_json) + self._data_axis_permutation = image_meta.data_axis_permutation + self._has_channel_axis = image_meta.has_channel_axis + self._shape = image_meta.shape + self._datatype = image_meta.datatype # Get the image levels. # TODO: Optimize and push down to C++ level self._levels = [ - ImageProperties(name=key[len(self._level_prefix) :], **json.loads(val)) + _LevelProperties(name=key[len(self._level_prefix) :], shape=val) for key, val in self.metadata.items() if key.startswith(self._level_prefix) ] - self._levels.sort(key=lambda level: (-level.width, -level.height, level.name)) + self._levels.sort( + key=lambda level: tuple(-val for val in level.shape) + (level.name,) + ) @_funcs.forwards_kwargs_to( DenseNDArray.create, exclude=("context", "shape", "tiledb_timestamp") @@ -279,50 +302,41 @@ def add_new_level( raise KeyError(f"{key!r} already exists in {type(self)} scales") # Check if the shape is valid. - ref_props = self._schema.reference_level_properties shape = tuple(shape) - if len(shape) != len(ref_props.shape): - raise ValueError( - f"New level must have {len(ref_props.shape)} dimensions, but shape " - f"{shape} has {len(shape)} dimensions." - ) - - # Check, create, and store as metadata the new level image properties. - props = ImageProperties( - image_type=ref_props.image_type, - name=key, - shape=shape, # type: ignore - ) - if ref_props.nchannels is not None and ref_props.nchannels != props.nchannels: + ndim = len(self._data_axis_permutation) + if len(shape) != ndim: raise ValueError( - f"New level must have {ref_props.nchannels}, but provided shape has " - f"{props.nchannels} channels." + f"New level must have {ndim} dimensions, but shape {shape} has " + f"{len(shape)} dimensions." ) - props_str = json.dumps( - { - "image_type": ref_props.image_type, - "shape": shape, - } - ) - self.metadata[meta_key] = props_str + if self._has_channel_axis: + channel_index = self._data_axis_permutation.index(len(self._coord_space)) + expected_nchannel = self._levels[0].shape[channel_index] + actual_nchannel = shape[channel_index] + if actual_nchannel != expected_nchannel: + raise ValueError( + f"New level must have {expected_nchannel}, but provided shape has " + f"{actual_nchannel} channels." + ) # Add the level properties to level list. # Note: The names are guaranteed to be different from the earlier checks. - for index, val in enumerate(self._levels): + props = _LevelProperties(name=key, shape=shape) + for index, other in enumerate(self._levels): # Note: Name is unique, so guaranteed to be strict ordering. - if (-props.width, -props.height, props.name) < ( - -val.width, - -val.height, - val.name, - ): + if tuple(-val for val in props.shape) + (props.name,) < tuple( + -val for val in other.shape + ) + (other.name,): self._levels.insert(index, props) break else: self._levels.append(props) - # Create and return new level array. + shape_str = json.dumps(shape) + self.metadata[meta_key] = shape_str + # Create and return new level array. return self._add_new_element( key, DenseNDArray, @@ -330,13 +344,46 @@ def add_new_level( create_uri, context=self.context, tiledb_timestamp=self.tiledb_timestamp_ms, - shape=props.shape, - type=self._schema.datatype, + shape=shape, + type=self._datatype, **kwargs, ), uri, ) + def set( + self, + key: str, + value: DenseNDArray, + *, + use_relative_uri: Optional[bool] = None, + ) -> Self: + """Sets a new level in the multi-scale image to be an existing SOMA + :class:`DenseNDArray`. + + Args: + key: The string key to set. + value: The SOMA object to insert into the collection. + use_relative_uri: Determines whether to store the collection + entry with a relative URI (provided the storage engine + supports it). + If ``None`` (the default), will automatically determine whether + to use an absolute or relative URI based on their relative + location. + If ``True``, will always use a relative URI. If the new child + does not share a relative URI base, or use of relative URIs + is not possible at all, the collection should raise an error. + If ``False``, will always use an absolute URI. + + Returns: ``self``, to enable method chaining. + + Lifecycle: experimental + """ + raise NotImplementedError( + "Support for setting external DenseNDArray objects to a MultiscaleImage " + "is not yet implemented." + ) + # Data operations def read_spatial_region( @@ -348,6 +395,7 @@ def read_spatial_region( region_transform: Optional[CoordinateTransform] = None, region_coord_space: Optional[CoordinateSpace] = None, result_order: options.ResultOrderStr = options.ResultOrder.ROW_MAJOR, + data_axis_order: Optional[Sequence[str]] = None, platform_config: Optional[options.PlatformConfig] = None, ) -> somacore.SpatialRead[pa.Tensor]: """Reads a user-defined spatial region from a specific level of the ``MultiscaleImage``. @@ -370,8 +418,13 @@ def read_spatial_region( region_coord_space: An optional coordinate space for the region being read. The axis names must match the input axis names of the transform. Defaults to ``None``, coordinate space will be inferred from transform. - result_order: the order to return results, specified as a - :class:`~options.ResultOrder` or its string value. + data_axis_order: The order to return the data axes in. Use ``soma_channel`` + to specify the location of the channel coordinate. + result_order: The order data to return results, specified as a + :class:`~options.ResultOrder` or its string value. This is the result + order the data is read from disk. It may be permuted if + ``data_axis_order`` is not the default order. + Returns: The data bounding the requested region as a :class:`SpatialRead` with @@ -380,17 +433,20 @@ def read_spatial_region( Lifecycle: Experimental. """ + if data_axis_order is not None: + raise NotImplementedError( + "Support for altering the data axis order on read is not yet " + "implemented." + ) + # Get reference level. Check image is 2D. - if self._schema.reference_level_properties.depth is not None: + if len(self._coord_space) > 2: raise NotImplementedError( "Support for reading the levels of 3D images it not yet implemented." ) - # Check input query region type is supported. - if ( - channel_coords is not None - and self._schema.reference_level_properties.nchannels is None - ): + # Check channel coords input is valid. + if channel_coords is not None and not self._has_channel_axis: raise ValueError( "Invalide channel coordinate provided. This image has no channel " "dimension." @@ -443,7 +499,7 @@ def read_spatial_region( region, region_transform, channel_coords, - self._schema.reference_level_properties.image_type, + self._data_axis_permutation, ) # Get the array. @@ -466,15 +522,24 @@ def read_spatial_region( ) # Metadata operations - - @property - def axis_names(self) -> Tuple[str, ...]: - """The name of the image axes. + def _level_properties(self, level: Union[int, str]) -> _LevelProperties: + """The properties of an image at the specified level. Lifecycle: Experimental. """ - return tuple(self._schema.axis_names) + # by name + # TODO could dyanmically create a dictionary whenever a name-based + # lookup is requested + if isinstance(level, str): + for val in self._levels: + if val.name == level: + return val + else: + raise KeyError("No level with name '{level}'") + + # by index + return self._levels[level] @property def coordinate_space(self) -> CoordinateSpace: @@ -504,6 +569,22 @@ def coordinate_space(self, value: CoordinateSpace) -> None: ) self._coord_space = value + @property + def data_axis_order(self) -> Tuple[str, ...]: + """The order of the axes for the resolution levels. + + Lifecycle: + Experimental. + """ + return tuple( + ( + "soma_channel" + if index == len(self._coord_space) + else self._coord_space.axis_names[index] + ) + for index in self._data_axis_permutation + ) + def get_transform_from_level(self, level: Union[int, str]) -> ScaleTransform: """Returns the transformation from user requested level to the image reference level. @@ -511,26 +592,17 @@ def get_transform_from_level(self, level: Union[int, str]) -> ScaleTransform: Lifecycle: Experimental. """ - level_props = self.level_properties(level) - ref_level_props = self._schema.reference_level_properties - if ref_level_props.depth is None: - return ScaleTransform( - input_axes=self._coord_space.axis_names, - output_axes=self._coord_space.axis_names, - scale_factors=[ - ref_level_props.width / level_props.width, - ref_level_props.height / level_props.height, - ], - ) - assert level_props.depth is not None + level_shape = self._level_properties(level).shape + base_shape = self._levels[0].shape + scale_factors = [ + base_shape[index] / level_shape[index] + for index in self._data_axis_permutation + if index != len(self._coord_space) + ] return ScaleTransform( input_axes=self._coord_space.axis_names, output_axes=self._coord_space.axis_names, - scale_factors=[ - ref_level_props.width / level_props.width, - ref_level_props.height / level_props.height, - ref_level_props.depth / level_props.depth, - ], + scale_factors=scale_factors, ) def get_transform_to_level(self, level: Union[int, str]) -> ScaleTransform: @@ -540,36 +612,27 @@ def get_transform_to_level(self, level: Union[int, str]) -> ScaleTransform: Lifecycle: Experimental. """ - level_props = self.level_properties(level) - ref_level_props = self._schema.reference_level_properties - if ref_level_props.depth is None: - return ScaleTransform( - input_axes=self._coord_space.axis_names, - output_axes=self._coord_space.axis_names, - scale_factors=[ - level_props.width / ref_level_props.width, - level_props.height / ref_level_props.height, - ], - ) - assert level_props.depth is not None + level_shape = self._level_properties(level).shape + base_shape = self._levels[0].shape + scale_factors = [ + level_shape[index] / base_shape[index] + for index in self._data_axis_permutation + if index != len(self._coord_space) + ] return ScaleTransform( input_axes=self._coord_space.axis_names, output_axes=self._coord_space.axis_names, - scale_factors=[ - level_props.width / ref_level_props.width, - level_props.height / ref_level_props.height, - level_props.depth / ref_level_props.depth, - ], + scale_factors=scale_factors, ) @property - def image_type(self) -> str: - """The order of the axes as stored in the data model. + def has_channel_axis(self) -> bool: + """Returns if the images have an explicit channel axis. Lifecycle: Experimental. """ - return self._schema.reference_level_properties.image_type + return self._has_channel_axis @property def level_count(self) -> int: @@ -580,105 +643,25 @@ def level_count(self) -> int: """ return len(self._levels) - def level_properties(self, level: Union[int, str]) -> ImageProperties: - """The properties of an image at the specified level. + def level_shape(self, level: Union[int, str]) -> Tuple[int, ...]: + """The shape of the image at the specified level. - Lifecycle: - Experimental. + Lifecycle: experimental """ - # by name - # TODO could dyanmically create a dictionary whenever a name-based - # lookup is requested + if isinstance(level, str): for val in self._levels: if val.name == level: - return val + return val.shape else: raise KeyError("No level with name '{level}'") # by index - return self._levels[level] - - @property - def reference_level(self) -> Optional[int]: - """The index of image level that is used as a reference level. - - This will return ``None`` if no current image level matches the size of the - reference level. - - Lifecycle: - Experimental. - """ - raise NotImplementedError() + return self._levels[level].shape @property - def reference_level_properties(self) -> "ImageProperties": - """The image properties of the reference level. - - Lifecycle: - Experimental. - """ - return self._schema.reference_level_properties - - -# TODO: Push down to C++ layer -@attrs.define -class MultiscaleImageSchema: - - reference_level_properties: ImageProperties - axis_names: Tuple[str, ...] - datatype: pa.DataType - - def __attrs_post_init__(self): # type: ignore[no-untyped-def] - ndim = len(self.reference_level_properties.shape) - if len(self.axis_names) != ndim: - raise ValueError( - f"Invalid axis names '{self.axis_names}'. {ndim} axis names must be " - f"provided for a multiscale image with image type " - f"{self.reference_level_properties.image_type}. " - ) - - def create_coordinate_space(self) -> CoordinateSpace: - return CoordinateSpace.from_axis_names(self.get_coordinate_space_axis_names()) - - def get_coordinate_space_axis_names(self) -> Tuple[str, ...]: - # TODO: Setting axes and the coordinate space is going to be updated - # in a future PR. - x_name: Optional[str] = None - y_name: Optional[str] = None - z_name: Optional[str] = None - for axis_name, axis_type in zip( - self.axis_names, self.reference_level_properties.image_type - ): - if axis_type == "X": - x_name = axis_name - elif axis_type == "Y": - y_name = axis_name - elif axis_type == "Z": - z_name = axis_name - assert x_name is not None # For mypy (already validated) - assert y_name is not None # For mypy (already validated) - if z_name is None: - return (x_name, y_name) - else: - return (x_name, y_name, z_name) - - def to_json(self) -> str: - type_str = pyarrow_to_carrow_type(self.datatype) - return json.dumps( - { - "name": self.reference_level_properties.name, - "image_type": self.reference_level_properties.image_type, - "shape": self.reference_level_properties.shape, - "axis_names": self.axis_names, - "datatype": type_str, - } - ) - - @classmethod - def from_json(cls, data: str) -> Self: - kwargs = json.loads(data) - axis_names = kwargs.pop("axis_names") - type_str = kwargs.pop("datatype") - type = carrow_type_to_pyarrow(type_str) - return cls(ImageProperties(**kwargs), axis_names, type) + def nchannel(self) -> int: + if self._has_channel_axis: + index = self._data_axis_permutation.index(len(self._coord_space)) + return self._levels[0].shape[index] + return 1 diff --git a/apis/python/src/tiledbsoma/_scene.py b/apis/python/src/tiledbsoma/_scene.py index fe4c9ec0d6..ba2015b935 100644 --- a/apis/python/src/tiledbsoma/_scene.py +++ b/apis/python/src/tiledbsoma/_scene.py @@ -221,8 +221,7 @@ def add_new_multiscale_image( *, transform: Optional[CoordinateTransform], uri: Optional[str] = None, - axis_names: Sequence[str] = ("c", "y", "x"), - axis_types: Sequence[str] = ("channel", "height", "width"), + coordinate_space: Union[Sequence[str], CoordinateSpace] = ("x", "y"), **kwargs: Any, ) -> MultiscaleImage: """Adds a ``MultiscaleImage`` to the scene and sets a coordinate transform @@ -260,28 +259,25 @@ def add_new_multiscale_image( f"space." ) - # Get and check the multiscale image coordinata space axis names. - # Note: The input paremeters to the MultiscaleImage create method are being - # revisited. The following code will be improved after the create - # parameters stabilize. - ordered_axis_names: List[Optional[str]] = [None, None, None] - for ax_name, ax_type in zip(axis_names, axis_types): - # Validation unneed if the type falls through. Invalid types will be - # caught in the MultiscaleImage.create method. - if ax_type == "width": - ordered_axis_names[0] = ax_name - elif ax_type == "height": - ordered_axis_names[1] = ax_name - elif ax_type == "depth": - ordered_axis_names[2] = ax_name - ordered_axis_names = [ - axis_name for axis_name in ordered_axis_names if axis_name is not None - ] - if transform.output_axes != tuple(ordered_axis_names): + if transform.input_axes != self.coordinate_space.axis_names: + raise ValueError( + f"The name of the transform input axes, {transform.input_axes}, " + f"do not match the name of the axes, " + f"{self.coordinate_space.axis_names}, in the scene coordinate " + f"space." + ) + + # Get multisclae image coordinate space and check. + elem_axis_names = ( + coordinate_space.axis_names + if isinstance(coordinate_space, CoordinateSpace) + else tuple(coordinate_space) + ) + if transform.output_axes != elem_axis_names: raise ValueError( f"The name of the transform output axes, {transform.output_axes}, " - f"do not match the name of the axes, {tuple(ordered_axis_names)}, " - f"of the coordinate space the multiscale image is defined on." + f"do not match the name of the axes, {elem_axis_names}, of the " + f"coordinate space the multiscale image is defined on." ) # Open the subcollection and add the new multiscale image. @@ -293,8 +289,7 @@ def add_new_multiscale_image( create_uri, context=self.context, tiledb_timestamp=self.tiledb_timestamp_ms, - axis_names=axis_names, - axis_types=axis_types, + coordinate_space=coordinate_space, **kwargs, ), uri, diff --git a/apis/python/src/tiledbsoma/_spatial_util.py b/apis/python/src/tiledbsoma/_spatial_util.py index 2540ffaba6..163050b078 100644 --- a/apis/python/src/tiledbsoma/_spatial_util.py +++ b/apis/python/src/tiledbsoma/_spatial_util.py @@ -116,7 +116,7 @@ def process_image_region( region: Optional[options.SpatialRegion], transform: somacore.CoordinateTransform, channel_coords: options.DenseCoord, - image_type: str, + data_order: Tuple[int, ...], ) -> Tuple[ options.DenseNDCoords, Optional[options.SpatialRegion], somacore.CoordinateTransform ]: @@ -157,14 +157,14 @@ def process_image_region( # Get the dense coordinates for querying the array storing the image. coords: options.DenseNDCoords = [] - for axis in image_type: - if axis == "C": + for axis in data_order: + if axis == len(inv_transform.input_axes): coords.append(channel_coords) # type: ignore[attr-defined] - if axis == "X": + if axis == 0: coords.append(x_coords) # type: ignore[attr-defined] - if axis == "Y": + if axis == 1: coords.append(y_coords) # type: ignore[attr-defined] - if axis == "Z": + if axis == 2: raise NotImplementedError( "Spatial queries are currently only supported for 2D coordinates." ) From 1ec23cd0fc7428347e2af06ff1d05b73e97cb8c4 Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Tue, 29 Oct 2024 17:31:54 -0400 Subject: [PATCH 03/10] Update Visium ingestion MultiscaleImage --- .../src/tiledbsoma/experimental/ingest.py | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/apis/python/src/tiledbsoma/experimental/ingest.py b/apis/python/src/tiledbsoma/experimental/ingest.py index d1cf9e9017..da7f377f55 100644 --- a/apis/python/src/tiledbsoma/experimental/ingest.py +++ b/apis/python/src/tiledbsoma/experimental/ingest.py @@ -44,7 +44,6 @@ DataFrame, DenseNDArray, Experiment, - ImageProperties, MultiscaleImage, PointCloudDataFrame, Scene, @@ -543,14 +542,13 @@ def from_visium( IdentityTransform(("x", "y"), ("x", "y")), ) else: - level_props: ImageProperties = ( - tissue_image.level_properties(0) - ) + base_shape = tissue_image.level_shape(0) + axis_order = tissue_image.data_axis_order + width = base_shape[axis_order.index("x")] + height = base_shape[axis_order.index("y")] updated_scales = ( - level_props.width - / np.round(level_props.width / scale), - level_props.height - / np.round(level_props.height / scale), + width / np.round(width / scale), + height / np.round(height / scale), ) scene.set_transform_to_multiscale_image( image_name, @@ -794,12 +792,10 @@ def _create_visium_tissue_images( im_data_numpy = np.array(im) if image_channel_first: - axis_names = ("c", "y", "x") - axis_types = ("channel", "height", "width") + data_axis_order = ("soma_channel", "y", "x") im_data_numpy = np.moveaxis(im_data_numpy, -1, 0) else: - axis_names = ("y", "x", "c") - axis_types = ("height", "width", "channel") + data_axis_order = ("y", "x", "soma_channel") ref_shape: Tuple[int, ...] = im_data_numpy.shape # Create the multiscale image. @@ -808,17 +804,18 @@ def _create_visium_tissue_images( image_pyramid = MultiscaleImage.create( uri, type=pa.uint8(), - reference_level_shape=ref_shape, - axis_names=axis_names, - axis_types=axis_types, + level_shape=ref_shape, + level_key=image_paths[0][0], + data_axis_order=data_axis_order, context=context, + platform_config=platform_config, ) # Add additional metadata. add_metadata(image_pyramid, additional_metadata) # Add and write the first level. - im_array = image_pyramid.add_new_level(image_paths[0][0], shape=ref_shape) + im_array = image_pyramid[image_paths[0][0]] im_data = pa.Tensor.from_numpy(im_data_numpy) im_array.write( (slice(None), slice(None), slice(None)), From 4f43c1ebca46039946ebe56df31f17212d9fd725 Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Tue, 29 Oct 2024 17:33:11 -0400 Subject: [PATCH 04/10] (WIP) More work on the multiscale image --- apis/python/src/tiledbsoma/__init__.py | 3 +- apis/python/src/tiledbsoma/_constants.py | 2 +- .../src/tiledbsoma/_multiscale_image.py | 59 +++++++++++++------ 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/apis/python/src/tiledbsoma/__init__.py b/apis/python/src/tiledbsoma/__init__.py index 1ea21ee5d9..07180e1ea6 100644 --- a/apis/python/src/tiledbsoma/__init__.py +++ b/apis/python/src/tiledbsoma/__init__.py @@ -176,7 +176,7 @@ ) from ._indexer import IntIndexer, tiledbsoma_build_index from ._measurement import Measurement -from ._multiscale_image import ImageProperties, MultiscaleImage +from ._multiscale_image import MultiscaleImage from ._point_cloud_dataframe import PointCloudDataFrame from ._scene import Scene from ._sparse_nd_array import SparseNDArray, SparseNDArrayRead @@ -211,7 +211,6 @@ "get_SOMA_version", "get_storage_engine", "IntIndexer", - "ImageProperties", "Measurement", "MultiscaleImage", "NotCreateableError", diff --git a/apis/python/src/tiledbsoma/_constants.py b/apis/python/src/tiledbsoma/_constants.py index d0d9dd1f8c..ad5f75de45 100644 --- a/apis/python/src/tiledbsoma/_constants.py +++ b/apis/python/src/tiledbsoma/_constants.py @@ -17,7 +17,7 @@ SOMA_ENCODING_VERSION = "1.1.0" SOMA_SPATIAL_VERSION_METADATA_KEY = "soma_spatial_encoding_version" -sOMA_SPATIAL_ENCODING_VERSION = "0.1.0" +SOMA_SPATIAL_ENCODING_VERSION = "0.1.0" SPATIAL_DISCLAIMER = ( diff --git a/apis/python/src/tiledbsoma/_multiscale_image.py b/apis/python/src/tiledbsoma/_multiscale_image.py index bce879e35e..275f86b45c 100644 --- a/apis/python/src/tiledbsoma/_multiscale_image.py +++ b/apis/python/src/tiledbsoma/_multiscale_image.py @@ -28,6 +28,7 @@ from ._constants import ( SOMA_COORDINATE_SPACE_METADATA_KEY, SOMA_MULTISCALE_IMAGE_SCHEMA, + SOMA_SPATIAL_ENCODING_VERSION, SOMA_SPATIAL_VERSION_METADATA_KEY, SPATIAL_DISCLAIMER, ) @@ -50,16 +51,16 @@ class _LevelProperties: """Properties for a single resolution level in a multiscale image.""" name: str - shape: Tuple[int, ...] + shape: Tuple[int, ...] = attrs.field(converter=tuple) @attrs.define(frozen=True) class _MultiscaleImageMetadata: """Helper class for reading/writing multiscale image metadata.""" - data_axis_permutation: Tuple[int, ...] + data_axis_permutation: Tuple[int, ...] = attrs.field(converter=tuple) has_channel_axis: bool - shape: Tuple[int, ...] + shape: Tuple[int, ...] = attrs.field(converter=tuple) datatype: pa.DataType def to_json(self) -> str: @@ -187,9 +188,9 @@ def create( axis_permutation = tuple(axis_indices[name] for name in data_axis_order) image_meta = _MultiscaleImageMetadata( - data_axis_permutation=axis_permutation, + data_axis_permutation=axis_permutation, # type: ignore (false positive) has_channel_axis=has_channel_axis, - shape=tuple(level_shape), + shape=level_shape, # type: ignore (false positive) datatype=type, ) @@ -206,15 +207,27 @@ def create( handle = _tdb_handles.MultiscaleImageWrapper.open( uri, "w", context, tiledb_timestamp ) + handle.metadata[SOMA_SPATIAL_VERSION_METADATA_KEY] = ( + SOMA_SPATIAL_ENCODING_VERSION + ) handle.metadata[SOMA_MULTISCALE_IMAGE_SCHEMA] = _image_meta_str handle.metadata[SOMA_COORDINATE_SPACE_METADATA_KEY] = coord_space_str - return cls( + multiscale = cls( handle, _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", ) except SOMAError as e: raise map_exception_for_create(e, uri) from None + multiscale.add_new_level( + level_key, + uri=level_uri, + shape=level_shape, + platform_config=platform_config, + ) + + return multiscale + def __init__( self, handle: _tdb_handles.SOMAGroupWrapper[Any], @@ -260,13 +273,14 @@ def __init__( image_meta = _MultiscaleImageMetadata.from_json(metadata_json) self._data_axis_permutation = image_meta.data_axis_permutation self._has_channel_axis = image_meta.has_channel_axis - self._shape = image_meta.shape self._datatype = image_meta.datatype # Get the image levels. # TODO: Optimize and push down to C++ level self._levels = [ - _LevelProperties(name=key[len(self._level_prefix) :], shape=val) + _LevelProperties( + name=key[len(self._level_prefix) :], shape=tuple(json.loads(val)) + ) for key, val in self.metadata.items() if key.startswith(self._level_prefix) ] @@ -310,19 +324,19 @@ def add_new_level( f"{len(shape)} dimensions." ) - if self._has_channel_axis: + if self._has_channel_axis and len(self._levels) > 0: channel_index = self._data_axis_permutation.index(len(self._coord_space)) - expected_nchannel = self._levels[0].shape[channel_index] - actual_nchannel = shape[channel_index] - if actual_nchannel != expected_nchannel: + expected_nchannels = self._levels[0].shape[channel_index] + actual_nchannels = shape[channel_index] + if actual_nchannels != expected_nchannels: raise ValueError( - f"New level must have {expected_nchannel}, but provided shape has " - f"{actual_nchannel} channels." + f"New level must have {expected_nchannels}, but provided shape has " + f"{actual_nchannels} channels." ) # Add the level properties to level list. # Note: The names are guaranteed to be different from the earlier checks. - props = _LevelProperties(name=key, shape=shape) + props = _LevelProperties(name=key, shape=shape) # type: ignore (false positive) for index, other in enumerate(self._levels): # Note: Name is unique, so guaranteed to be strict ordering. if tuple(-val for val in props.shape) + (props.name,) < tuple( @@ -594,11 +608,16 @@ def get_transform_from_level(self, level: Union[int, str]) -> ScaleTransform: """ level_shape = self._level_properties(level).shape base_shape = self._levels[0].shape + coord_indexer = sorted( + range(len(self._data_axis_permutation)), + key=lambda index: self._data_axis_permutation[index], + ) scale_factors = [ base_shape[index] / level_shape[index] - for index in self._data_axis_permutation + for index in coord_indexer if index != len(self._coord_space) ] + return ScaleTransform( input_axes=self._coord_space.axis_names, output_axes=self._coord_space.axis_names, @@ -614,9 +633,13 @@ def get_transform_to_level(self, level: Union[int, str]) -> ScaleTransform: """ level_shape = self._level_properties(level).shape base_shape = self._levels[0].shape + coord_indexer = sorted( + range(len(self._data_axis_permutation)), + key=lambda index: self._data_axis_permutation[index], + ) scale_factors = [ level_shape[index] / base_shape[index] - for index in self._data_axis_permutation + for index in coord_indexer if index != len(self._coord_space) ] return ScaleTransform( @@ -660,7 +683,7 @@ def level_shape(self, level: Union[int, str]) -> Tuple[int, ...]: return self._levels[level].shape @property - def nchannel(self) -> int: + def nchannels(self) -> int: if self._has_channel_axis: index = self._data_axis_permutation.index(len(self._coord_space)) return self._levels[0].shape[index] From cc72a4f2d39cc437edfa143b03427d3363b9f041 Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Tue, 29 Oct 2024 17:33:46 -0400 Subject: [PATCH 05/10] (WIP) Update/fix multiscale image tests --- apis/python/tests/test_multiscale_image.py | 162 +++++++++------------ 1 file changed, 69 insertions(+), 93 deletions(-) diff --git a/apis/python/tests/test_multiscale_image.py b/apis/python/tests/test_multiscale_image.py index 3d8763db55..35de4ae561 100644 --- a/apis/python/tests/test_multiscale_image.py +++ b/apis/python/tests/test_multiscale_image.py @@ -17,8 +17,8 @@ def test_multiscale_image_bad_create(tmp_path): soma.MultiscaleImage.create( baseuri, type=pa.string(), - axis_names=("x", "y", "y"), - reference_level_shape=(3, 64, 64), + coordinate_space=("x", "y", "y"), + level_shape=(3, 64, 64), ) # Repeated axis names. @@ -26,46 +26,39 @@ def test_multiscale_image_bad_create(tmp_path): soma.MultiscaleImage.create( baseuri, type=pa.uint8(), - axis_names=("x", "y", "y"), - reference_level_shape=(3, 64, 64), + coordinate_space=("x", "y", "y"), + level_shape=(3, 64, 64), ) - # Repeated axis types. + # Repeated data axis order, bad length. with pytest.raises(ValueError): soma.MultiscaleImage.create( baseuri, type=pa.uint8(), - axis_types=("height", "height", "width"), - reference_level_shape=(3, 64, 64), + data_axis_order=("x", "x", "y"), + level_shape=(3, 64, 64), ) - # Invalid axis type. + # Invalid data axis order. with pytest.raises(ValueError): soma.MultiscaleImage.create( baseuri, type=pa.uint8(), - axis_types=("c", "height", "width"), - reference_level_shape=(3, 64, 64), + data_axis_order=("soma_channel", "y", "bad-axis-name"), + level_shape=(3, 64, 64), ) - # Mismatch in number of axis names and reference shape. + # Data order has `soma_channel` when not expected with pytest.raises(ValueError): soma.MultiscaleImage.create( baseuri, type=pa.uint8(), - axis_types=("x", "y", "y"), - reference_level_shape=(3, 64, 64), + data_axis_order=("soma_channel", "y", "x"), + level_shape=(3, 64, 64), + has_channel_axis=False, ) - # Mismatch in number of axis names and axis types. - with pytest.raises(ValueError): - soma.MultiscaleImage.create( - baseuri, - type=pa.uint8(), - reference_level_shape=(64, 64), - axis_names=("y", "x"), - axis_types=("channels",), - ) + # TODO: Add more checks def test_multiscale_basic(tmp_path): @@ -76,18 +69,14 @@ def test_multiscale_basic(tmp_path): with soma.MultiscaleImage.create( image_uri, type=pa.uint8(), - reference_level_shape=(128, 64), - axis_names=("y", "x"), - axis_types=("height", "width"), + level_shape=(128, 64), + has_channel_axis=False, ) as image: - # Create reference level. - image.add_new_level("level0", shape=(128, 64)) - - # Create medium sized downsample. + # Add medium sized downsample. image.add_new_level("level1", shape=(64, 32)) - # Create very small downsample and write to it. + # Add very small downsample and write to it. level2 = image.add_new_level("level2", shape=(8, 4)) level2_data = pa.Tensor.from_numpy(np.arange(32, dtype=np.uint8).reshape(8, 4)) level2.write((slice(None), slice(None)), level2_data) @@ -96,15 +85,9 @@ def test_multiscale_basic(tmp_path): with soma.MultiscaleImage.open(image_uri, mode="r") as image: # Check the base properties for the image. - assert image.axis_names == ("y", "x") - base_props = image.reference_level_properties - assert base_props.name == "reference_level" - assert base_props.shape == (128, 64) - assert base_props.height == 128 - assert base_props.width == 64 - assert base_props.nchannels is None - assert base_props.depth is None - assert base_props.image_type == "YX" + assert image.level_shape(0) == (128, 64) + assert image.nchannels == 1 + assert image.data_axis_order == ("y", "x") # Check coordinate space. coord_space = image.coordinate_space @@ -114,15 +97,7 @@ def test_multiscale_basic(tmp_path): # Check the number of levels and level properties. assert image.level_count == 3 for index, shape in enumerate([(128, 64), (64, 32), (8, 4)]): - props = image.level_properties(index) - assert props == image.level_properties(props.name) - assert props.nchannels is None - assert props.depth is None - assert props.image_type == "YX" - assert props.name == f"level{index}" - assert props.shape == shape - assert props.height == shape[0] - assert props.width == shape[1] + assert image.level_shape(index) == shape # Check a basic read assert level2_data == image.read_spatial_region(2).data @@ -162,13 +137,11 @@ def image_uri(self, tmp_path_factory): with soma.MultiscaleImage.create( image_uri, type=pa.uint8(), - reference_level_shape=(1, 9, 8), - axis_names=("c", "y", "x"), - axis_types=("channel", "height", "width"), + level_shape=(1, 9, 8), ) as image: coords = (slice(None), slice(None), slice(None)) # Create levels. - l0 = image.add_new_level("level0", shape=(1, 9, 8)) + l0 = image["level0"] l0.write( coords, pa.Tensor.from_numpy(np.arange(72, dtype=np.uint8).reshape(1, 9, 8)), @@ -228,108 +201,111 @@ def test_read_spatial_region( ) -def create_multiscale(baseuri, axis_names, axis_types, shapes): +def create_multiscale(baseuri, coord_space, axis_order, has_channel_axis, shapes): image_uri = urljoin(baseuri, "default") with soma.MultiscaleImage.create( image_uri, type=pa.uint8(), - axis_names=axis_names, - axis_types=axis_types, - reference_level_shape=shapes[0], + coordinate_space=coord_space, + data_axis_order=axis_order, + has_channel_axis=has_channel_axis, + level_shape=shapes[0], ) as image: - for i in range(len(shapes)): + for i in range(1, len(shapes)): image.add_new_level(f"level{i}", shape=shapes[i]) return image_uri @pytest.mark.parametrize( - "axis_names, axis_types, shapes, expected_scale_factors", + "coord_space, axis_order, has_channel_axis, shapes, expected_scale_factors", [ [ - ("C", "Z", "Y", "X"), - ("channel", "depth", "height", "width"), + ("x", "y", "z"), + ("soma_channel", "z", "y", "x"), + True, ((128, 64, 32, 16), (128, 32, 16, 8), (128, 16, 8, 4), (128, 4, 2, 1)), ([1, 1, 1], [2, 2, 2], [4, 4, 4], [16, 16, 16]), ], [ - ("C", "Z", "Y", "X"), - ("channel", "depth", "height", "width"), + ("x", "y", "z"), + ("soma_channel", "z", "y", "x"), + True, ((128, 64, 32, 16), (128, 32, 16, 8), (128, 16, 8, 4)), ([1, 1, 1], [2, 2, 2], [4, 4, 4]), ], [ - ("C", "Z", "Y", "X"), - ("channel", "depth", "height", "width"), + ("x", "y", "z"), + ("soma_channel", "z", "y", "x"), + True, ((64, 64, 64, 64), (64, 64, 64, 64)), ([1, 1, 1], [1, 1, 1]), ], [ - ("C", "Z", "Y", "X"), - ("channel", "depth", "height", "width"), + ("x", "y", "z"), + ("soma_channel", "z", "y", "x"), + True, ((64, 32, 16, 8), (64, 16, 8, 4)), ([1, 1, 1], [2, 2, 2]), ], [ - ("C", "Z", "Y", "X"), - ("channel", "depth", "height", "width"), + ("x", "y", "z"), + ("soma_channel", "z", "y", "x"), + True, ((128, 64, 32, 16), (128, 32, 32, 8), (128, 16, 16, 4)), ([1, 1, 1], [2, 1, 2], [4, 2, 4]), ], [ - ("C", "Y", "X"), - ("channel", "height", "width"), + ("x", "y"), + ("soma_channel", "y", "x"), + True, ((128, 64, 32), (128, 32, 16), (128, 16, 8)), ([1, 1], [2, 2], [4, 4]), ], [ - ("C", "Y", "X"), - ("channel", "height", "width"), + ("x", "y"), + ("soma_channel", "y", "x"), + True, ((128, 128, 128), (128, 128, 128)), ([1, 1], [1, 1]), ], [ - ("Y", "X"), - ("height", "width"), + ("x", "y"), + ("y", "x"), + False, ((128, 128), (128, 128)), ([1, 1], [1, 1]), ], [ - ("Y", "X"), - ("height", "width"), + ("x", "y"), + ("y", "x"), + False, ((128, 64), (64, 32)), ([1, 1], [2, 2]), ], [ - ("Y", "X"), - ("height", "width"), + ("x", "y"), + ("y", "x"), + False, ((60, 30), (30, 6)), ([1, 1], [5, 2]), ], ], ) def test_multiscale_with_axis_names( - tmp_path, axis_names, axis_types, shapes, expected_scale_factors + tmp_path, coord_space, axis_order, has_channel_axis, shapes, expected_scale_factors ): baseuri = urljoin(f"{tmp_path.as_uri()}/", "test_multiscale_with_axis_names") - image_uri = create_multiscale(baseuri, axis_names, axis_types, shapes) + image_uri = create_multiscale( + baseuri, coord_space, axis_order, has_channel_axis, shapes + ) with soma.MultiscaleImage.open(image_uri, mode="r") as image: - base_props = image.reference_level_properties - assert base_props.shape == shapes[0] assert image.level_count == len(shapes) + # TODO Check channels + for index, shape in enumerate(shapes): - props = image.level_properties(index) - assert props.name == f"level{index}" - assert props == image.level_properties(props.name) - assert props.image_type == "".join(axis_names) - assert props.shape == shape - - for i, axis_type in enumerate(axis_types): - if axis_type == "channel": - assert getattr(props, "nchannels") == shape[i] - else: - assert getattr(props, axis_type) == shape[i] + assert shape == image.level_shape(index) # Check transform to and from levels assert np.array_equal( From 348d264a83ffe4de6712579b6099ad076c5b8b0b Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Wed, 30 Oct 2024 10:38:20 -0400 Subject: [PATCH 06/10] Fix type ignore comments and get transform methods --- .../src/tiledbsoma/_multiscale_image.py | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/apis/python/src/tiledbsoma/_multiscale_image.py b/apis/python/src/tiledbsoma/_multiscale_image.py index 275f86b45c..204bc86b3f 100644 --- a/apis/python/src/tiledbsoma/_multiscale_image.py +++ b/apis/python/src/tiledbsoma/_multiscale_image.py @@ -9,7 +9,7 @@ import json import warnings -from typing import Any, Optional, Sequence, Tuple, Union +from typing import Any, List, Optional, Sequence, Tuple, Union import attrs import pyarrow as pa @@ -187,10 +187,12 @@ def create( raise ValueError() # TODO: Add error axis_permutation = tuple(axis_indices[name] for name in data_axis_order) + # The type ignore comments are to address a false positive in the attrs tuple + # constructor. image_meta = _MultiscaleImageMetadata( - data_axis_permutation=axis_permutation, # type: ignore (false positive) + data_axis_permutation=axis_permutation, # type: ignore[arg-type] has_channel_axis=has_channel_axis, - shape=level_shape, # type: ignore (false positive) + shape=level_shape, # type: ignore[arg-type] datatype=type, ) @@ -336,7 +338,8 @@ def add_new_level( # Add the level properties to level list. # Note: The names are guaranteed to be different from the earlier checks. - props = _LevelProperties(name=key, shape=shape) # type: ignore (false positive) + # Type ignore is for a false positive in the attrs tuple constructor. + props = _LevelProperties(name=key, shape=shape) # type: ignore[arg-type] for index, other in enumerate(self._levels): # Note: Name is unique, so guaranteed to be strict ordering. if tuple(-val for val in props.shape) + (props.name,) < tuple( @@ -537,11 +540,7 @@ def read_spatial_region( # Metadata operations def _level_properties(self, level: Union[int, str]) -> _LevelProperties: - """The properties of an image at the specified level. - - Lifecycle: - Experimental. - """ + """The properties of an image at the specified level.""" # by name # TODO could dyanmically create a dictionary whenever a name-based # lookup is requested @@ -555,6 +554,15 @@ def _level_properties(self, level: Union[int, str]) -> _LevelProperties: # by index return self._levels[level] + def _axis_order(self) -> List[int]: + """Indices for accessing the data order for spatial axes.""" + axes = [ + index + for index in range(len(self._data_axis_permutation)) + if self._data_axis_permutation[index] != len(self._coord_space) + ] + return sorted(axes, key=lambda index: self._data_axis_permutation[index]) + @property def coordinate_space(self) -> CoordinateSpace: """Coordinate space for this multiscale image. @@ -608,16 +616,10 @@ def get_transform_from_level(self, level: Union[int, str]) -> ScaleTransform: """ level_shape = self._level_properties(level).shape base_shape = self._levels[0].shape - coord_indexer = sorted( - range(len(self._data_axis_permutation)), - key=lambda index: self._data_axis_permutation[index], - ) + axis_indexer = self._axis_order() scale_factors = [ - base_shape[index] / level_shape[index] - for index in coord_indexer - if index != len(self._coord_space) + base_shape[index] / level_shape[index] for index in axis_indexer ] - return ScaleTransform( input_axes=self._coord_space.axis_names, output_axes=self._coord_space.axis_names, @@ -633,14 +635,9 @@ def get_transform_to_level(self, level: Union[int, str]) -> ScaleTransform: """ level_shape = self._level_properties(level).shape base_shape = self._levels[0].shape - coord_indexer = sorted( - range(len(self._data_axis_permutation)), - key=lambda index: self._data_axis_permutation[index], - ) + axis_indexer = self._axis_order() scale_factors = [ - level_shape[index] / base_shape[index] - for index in coord_indexer - if index != len(self._coord_space) + level_shape[index] / base_shape[index] for index in axis_indexer ] return ScaleTransform( input_axes=self._coord_space.axis_names, From b71bee0c35c3d25f181eb0e1dad111b5dae8cae3 Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Wed, 30 Oct 2024 10:45:24 -0400 Subject: [PATCH 07/10] Fix process_image_region coord construction --- apis/python/src/tiledbsoma/_spatial_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apis/python/src/tiledbsoma/_spatial_util.py b/apis/python/src/tiledbsoma/_spatial_util.py index 163050b078..a16a364383 100644 --- a/apis/python/src/tiledbsoma/_spatial_util.py +++ b/apis/python/src/tiledbsoma/_spatial_util.py @@ -160,11 +160,11 @@ def process_image_region( for axis in data_order: if axis == len(inv_transform.input_axes): coords.append(channel_coords) # type: ignore[attr-defined] - if axis == 0: + elif axis == 0: coords.append(x_coords) # type: ignore[attr-defined] - if axis == 1: + elif axis == 1: coords.append(y_coords) # type: ignore[attr-defined] - if axis == 2: + elif axis == 2: raise NotImplementedError( "Spatial queries are currently only supported for 2D coordinates." ) From 866cf7de772d1f22ed9a64c5bedd1f8aa294cbd9 Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Wed, 30 Oct 2024 10:46:34 -0400 Subject: [PATCH 08/10] Clean-up and fix tests --- apis/python/tests/test_multiscale_image.py | 31 +++++++++------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/apis/python/tests/test_multiscale_image.py b/apis/python/tests/test_multiscale_image.py index 35de4ae561..b09dae9de3 100644 --- a/apis/python/tests/test_multiscale_image.py +++ b/apis/python/tests/test_multiscale_image.py @@ -307,23 +307,18 @@ def test_multiscale_with_axis_names( for index, shape in enumerate(shapes): assert shape == image.level_shape(index) - # Check transform to and from levels - assert np.array_equal( - image.get_transform_to_level(index).scale_factors, - 1 / np.array(expected_scale_factors[index]), - ) - assert np.array_equal( - image.get_transform_to_level(f"level{index}").scale_factors, - 1 / np.array(expected_scale_factors[index]), - ) - assert np.array_equal( - image.get_transform_from_level(index).scale_factors, - expected_scale_factors[index], - ) - assert np.array_equal( - image.get_transform_from_level(f"level{index}").scale_factors, - expected_scale_factors[index], - ) + # Check transform to levels. + expected = 1 / np.array(expected_scale_factors[index]) + actual = image.get_transform_to_level(index).scale_factors + assert np.array_equal(actual, expected) + assert np.array_equal(actual, expected) + + # Check transform from levels + expected = expected_scale_factors[index] + actual = image.get_transform_from_level(index).scale_factors + assert np.array_equal(actual, expected) + actual = image.get_transform_from_level(f"level{index}").scale_factors + assert np.array_equal(actual, expected) @pytest.mark.parametrize( @@ -370,7 +365,7 @@ def test_multiscale_with_axis_names( ) def test_multiscale_2d_read_region(tmp_path, shapes, region, scale_factors): baseuri = urljoin(f"{tmp_path.as_uri()}/", "test_multiscale_read_region") - image_uri = create_multiscale(baseuri, ("Y", "X"), ("height", "width"), shapes) + image_uri = create_multiscale(baseuri, ("x", "y"), ("y", "x"), False, shapes) with soma.Collection.open(image_uri, mode="w") as image: for i, shape in enumerate(shapes): From b25f72c0473c6b523bd1c278ca3655441e52008e Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Wed, 30 Oct 2024 10:59:44 -0400 Subject: [PATCH 09/10] Update args in Scene add_multiscale_image tests --- apis/python/tests/test_scene.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apis/python/tests/test_scene.py b/apis/python/tests/test_scene.py index 9682d3dd09..aced5b4f9f 100644 --- a/apis/python/tests/test_scene.py +++ b/apis/python/tests/test_scene.py @@ -431,7 +431,7 @@ def test_scene_multiscale_image(tmp_path): "img", transform=transform, type=pa.int64(), - reference_level_shape=[1, 2, 3], + level_shape=[1, 2, 3], ) # The scene coordinate space must be set before registering @@ -455,7 +455,7 @@ def test_scene_multiscale_image(tmp_path): "img", transform=bad_transform, type=pa.int64(), - reference_level_shape=[1, 2, 3], + level_shape=[1, 2, 3], ) # Mismatch in transform output axes and multiscale image coordinate space axes. @@ -470,7 +470,7 @@ def test_scene_multiscale_image(tmp_path): "img", transform=bad_transform, type=pa.int64(), - reference_level_shape=[1, 2, 3], + level_shape=[1, 2, 3], ) # Add the multiscale image. @@ -479,7 +479,7 @@ def test_scene_multiscale_image(tmp_path): "img", transform=transform, type=pa.int64(), - reference_level_shape=[1, 2, 3], + level_shape=[1, 2, 3], ) # Check the transform. @@ -521,7 +521,7 @@ def test_scene_set_transfrom_to_multiscale_image( "img", transform=None, type=pa.int64(), - reference_level_shape=[3, 8, 9], + level_shape=[3, 8, 9], ) transform = coord_transform( From 8878e1bce44204e1ca2badd11e70f6d09b687d0c Mon Sep 17 00:00:00 2001 From: Julia Dark Date: Wed, 30 Oct 2024 14:13:22 -0400 Subject: [PATCH 10/10] Update spatial notebook --- apis/python/notebooks/tutorial_spatial.ipynb | 135 +++++++------------ 1 file changed, 52 insertions(+), 83 deletions(-) diff --git a/apis/python/notebooks/tutorial_spatial.ipynb b/apis/python/notebooks/tutorial_spatial.ipynb index 600639392f..3da57c05f7 100644 --- a/apis/python/notebooks/tutorial_spatial.ipynb +++ b/apis/python/notebooks/tutorial_spatial.ipynb @@ -151,7 +151,15 @@ "execution_count": 6, "id": "e27b622a-a1d0-40c2-bea7-cc9a9bc80596", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "data/CytAssist_FFPE_Protein_Expression_Human_Glioblastoma/soma exists; skipping 10x data download\n" + ] + } + ], "source": [ "paths = {\n", " filtered_h5: 'filtered_feature_bc_matrix.h5',\n", @@ -179,17 +187,7 @@ "execution_count": 7, "id": "f8f71d9d-cdaf-4b9f-81f8-39473e930a7e", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Ingesting data/CytAssist_FFPE_Protein_Expression_Human_Glioblastoma/10x to data/CytAssist_FFPE_Protein_Expression_Human_Glioblastoma/soma\n", - "/tmp/ipykernel_21706/2080821725.py:3: UserWarning: Support for spatial types is experimental. Changes to both the API and data storage may not be backwards compatible.\n", - " from_visium(\n" - ] - } - ], + "outputs": [], "source": [ "if not exists(exp_uri):\n", " err(f\"Ingesting {data_dir_10x} to {exp_uri}\")\n", @@ -473,35 +471,6 @@ "tissue_image.level_count" ] }, - { - "cell_type": "markdown", - "id": "ac620b04-17e9-4b7b-acb5-5aea9c5a2102", - "metadata": {}, - "source": [ - "The `MultiscaleImage` has reference properties that are used to get the scale for the different levels." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "f3479a9e-bb32-459f-9f61-82b953f0ef0e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ImageProperties(name='reference_level', image_type='CYX', shape=(3, 2000, 1744), width=1744, height=2000, depth=None, nchannels=3)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tissue_image.reference_level_properties" - ] - }, { "cell_type": "markdown", "id": "b9d2ea73-0bd7-4631-9759-5420200a3bfd", @@ -512,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "0f2e9150-0734-402b-957e-19f84548bf8b", "metadata": {}, "outputs": [ @@ -520,14 +489,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Level 0: ImageProperties(name='hires', image_type='CYX', shape=(3, 2000, 1744), width=1744, height=2000, depth=None, nchannels=3)\n", - "Level 1: ImageProperties(name='lowres', image_type='CYX', shape=(3, 600, 523), width=523, height=600, depth=None, nchannels=3)\n" + "Level 0 shape: (3, 2000, 1744)\n", + "Level 1 shape: (3, 600, 523)\n" ] } ], "source": [ "for level in range(tissue_image.level_count):\n", - " print(f\"Level {level}: {tissue_image.level_properties(level)}\")" + " print(f\"Level {level} shape: {tissue_image.level_shape(level)}\")" ] }, { @@ -540,7 +509,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "949c159b-52d6-4348-9e23-a22d28e7dfa5", "metadata": {}, "outputs": [ @@ -550,7 +519,7 @@ "" ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -562,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "c445af0d-b3ac-4a2b-a15c-f7e6a25dffeb", "metadata": {}, "outputs": [], @@ -572,17 +541,17 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "5a8c2ca2-11d7-4856-b537-474dd15269e6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, @@ -613,7 +582,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "ddf8a050-aef5-414a-a29d-11b290cac452", "metadata": {}, "outputs": [], @@ -623,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "4e86469c-a466-4e19-9f02-df88a94b1a18", "metadata": {}, "outputs": [ @@ -633,7 +602,7 @@ "CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels')))" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -644,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "b3c0d0db-da05-4a38-a114-07f1617cf1d9", "metadata": {}, "outputs": [ @@ -824,7 +793,7 @@ "[5756 rows x 7 columns]" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -836,7 +805,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "943f2ff9-4832-4ee1-b0e1-07674660f5d5", "metadata": {}, "outputs": [ @@ -943,7 +912,7 @@ "[5756 rows x 2 columns]" ] }, - "execution_count": 24, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -955,7 +924,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "7adb1345-9386-44d2-9fc7-10455ae9152b", "metadata": {}, "outputs": [ @@ -967,7 +936,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -988,7 +957,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "id": "2260b4bf-60be-4c30-985d-4b85bb2bab27", "metadata": {}, "outputs": [], @@ -1002,7 +971,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "id": "302577ff-4f75-4eeb-910f-aa1b51d1d075", "metadata": {}, "outputs": [ @@ -1047,7 +1016,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "id": "26593cd4-1bcf-472c-88ba-d9d305ee5720", "metadata": {}, "outputs": [ @@ -1060,7 +1029,7 @@ " scales: [0.07091737 0.07091696]>" ] }, - "execution_count": 28, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1071,7 +1040,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "id": "6a984e2a-8532-474b-82c8-bcc489f99f71", "metadata": {}, "outputs": [ @@ -1084,7 +1053,7 @@ " scales: [0.02126708 0.02127509]>" ] }, - "execution_count": 29, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1095,7 +1064,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "id": "8fa549eb-598d-4a0c-9ab9-dd1bfac4199b", "metadata": {}, "outputs": [ @@ -1107,7 +1076,7 @@ " output axes: ('x', 'y')>" ] }, - "execution_count": 30, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1130,7 +1099,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "id": "18751a3c-0e29-4e00-ba58-3529e7164f6c", "metadata": {}, "outputs": [], @@ -1143,7 +1112,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "id": "02d18d4b-0caf-4fd5-ab6f-a0c8b2122069", "metadata": {}, "outputs": [ @@ -1162,7 +1131,7 @@ " [0.00000000e+00 0.00000000e+00 1.00000000e+00]]>)" ] }, - "execution_count": 32, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1174,7 +1143,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "id": "42fa7736-659c-49e5-8219-09d0b0832778", "metadata": {}, "outputs": [ @@ -1186,7 +1155,7 @@ " [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])" ] }, - "execution_count": 33, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1205,19 +1174,19 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 33, "id": "e136c1b9-3b7f-44ed-aa20-e2abdb1868e1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "SpatialRead(data=, data_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), output_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), coordinate_transform=, data_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), output_coordinate_space=CoordinateSpace(axes=(Axis(name='x', unit='pixels'), Axis(name='y', unit='pixels'))), coordinate_transform=)" ] }, - "execution_count": 34, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1229,7 +1198,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 34, "id": "53554033-a469-4126-88a3-bd9cec9fd78f", "metadata": {}, "outputs": [ @@ -1409,7 +1378,7 @@ "[928 rows x 7 columns]" ] }, - "execution_count": 35, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1421,7 +1390,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 35, "id": "34a51b21-2d77-47c2-a3e1-bf159cd1b001", "metadata": {}, "outputs": [ @@ -1433,7 +1402,7 @@ " [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])" ] }, - "execution_count": 36, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1445,7 +1414,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 36, "id": "c7c22f26-613c-44d9-9ce9-7abc64b377f9", "metadata": {}, "outputs": [], @@ -1458,7 +1427,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 37, "id": "021cf9b5-9880-4857-8164-326dc26d8b6d", "metadata": {}, "outputs": [], @@ -1478,7 +1447,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 38, "id": "1a67bc46-f768-4a16-8324-828c5a38bd57", "metadata": {}, "outputs": [