From 9cb6f801c191f6a9f80eaaecda4a6fe9925b7ae9 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Wed, 18 Sep 2024 16:50:24 -0400 Subject: [PATCH 01/17] Add classes for coordinate spaces and transforms (#218) New classes: * Axis: simple data class storing a name, optional units, and optional scale * CoordinateSpace: used to define coordinate spaces for spatial data * CoordinateTransform: abstract base class for coordinate transforms * AffineTransform: implementation for a generic affine coordinate transform * ScaleTransform: implementation for a coordinate transform that scales axes (may be isotropic/uniform) * IdentityTransform: implementation for an identity transform that only applies a name change to axes --------- Co-authored-by: paul fisher --- python-spec/src/somacore/__init__.py | 14 + python-spec/src/somacore/coordinates.py | 381 ++++++++++++++++++++++++ python-spec/src/somacore/types.py | 30 +- python-spec/testing/test_coordinates.py | 306 +++++++++++++++++++ 4 files changed, 730 insertions(+), 1 deletion(-) create mode 100644 python-spec/src/somacore/coordinates.py create mode 100644 python-spec/testing/test_coordinates.py diff --git a/python-spec/src/somacore/__init__.py b/python-spec/src/somacore/__init__.py index 6731f5e3..9946871d 100644 --- a/python-spec/src/somacore/__init__.py +++ b/python-spec/src/somacore/__init__.py @@ -16,6 +16,13 @@ from .base import SOMAObject from .collection import Collection +from .coordinates import AffineTransform +from .coordinates import Axis +from .coordinates import CoordinateSpace +from .coordinates import CoordinateTransform +from .coordinates import IdentityTransform +from .coordinates import ScaleTransform +from .coordinates import UniformScaleTransform from .data import DataFrame from .data import DenseNDArray from .data import NDArray @@ -61,4 +68,11 @@ "AxisQuery", "ExperimentAxisQuery", "ContextBase", + "Axis", + "CoordinateSpace", + "CoordinateTransform", + "AffineTransform", + "ScaleTransform", + "UniformScaleTransform", + "IdentityTransform", ) diff --git a/python-spec/src/somacore/coordinates.py b/python-spec/src/somacore/coordinates.py new file mode 100644 index 00000000..986ed83e --- /dev/null +++ b/python-spec/src/somacore/coordinates.py @@ -0,0 +1,381 @@ +"""Definitions of types related to coordinate systems.""" + +import abc +import collections.abc +from typing import Optional, Sequence, Tuple, Union + +import attrs +import numpy as np +import numpy.typing as npt + +from .types import str_or_seq_length +from .types import to_string_tuple + + +@attrs.define(frozen=True) +class Axis: + """A description of an axis of a coordinate system + + Lifecycle: experimental + """ + + name: str + """Name of the axis.""" + unit: Optional[str] = None + """Optional string name for the units of the axis.""" + + +@attrs.define(frozen=True) +class CoordinateSpace( + collections.abc.Sequence +): # Change to Sequence[Axis] after 3.8 is dropped. + """A coordinate space for spatial data. + + Args: + axes: The axes of the coordinate system in order. + + Lifecycle: experimental + """ + + axes: Tuple[Axis, ...] = attrs.field(converter=tuple) + + @axes.validator + def _validate(self, _, axes: Tuple[Axis, ...]) -> None: + if not axes: + raise ValueError("The coordinate space must have at least one axis.") + if len(set(axis.name for axis in self.axes)) != len(axes): + raise ValueError("The names for the axes must be unique.") + + def __len__(self) -> int: + return len(self.axes) + + def __getitem__(self, index: int) -> Axis: # type: ignore[override] + return self.axes[index] + + @property + def axis_names(self) -> Tuple[str, ...]: + """The names of the axes in order. + + Lifecycle: experimental + """ + return tuple(axis.name for axis in self.axes) + + +class CoordinateTransform(metaclass=abc.ABCMeta): + """A coordinate transformation from one coordinate space to another. + + Args: + input_axes: The names of the axes for the input coordinate space. + output_axes: The names of the axes for the output coordinate space. + + CoordinateTransform classes are composable using the ``@`` (__matmul__) operator. + + Lifecycle: experimental + """ + + def __init__( + self, + input_axes: Union[str, Sequence[str]], + output_axes: Union[str, Sequence[str]], + ): + self._input_axes = to_string_tuple(input_axes) + self._output_axes = to_string_tuple(output_axes) + + def _check_matmul_inner_axes(self, other: "CoordinateTransform"): + """Throws a ``ValueError`` if ``self @ other`` has mismatched axes.""" + if self.input_axes != other.output_axes: + raise ValueError( + f"Input axes of {type(self).__name__} must match output axes of " + f"{type(other).__name__}." + ) + + def _check_rmatmul_inner_axes(self, other: "CoordinateTransform"): + """Throws a ``ValueError `` if ``other @ self`` has mismatched axes.""" + if self.output_axes != other.input_axes: + raise ValueError( + f"Input axes of {type(other).__name__} must match output axes of " + f"{type(self).__name__}." + ) + + @abc.abstractmethod + def __matmul__(self, other: object) -> "CoordinateTransform": + raise NotImplementedError() + + @abc.abstractmethod + def inverse_transform(self) -> "CoordinateTransform": + """Returns the inverse coordinate transform. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + def input_axes(self) -> Tuple[str, ...]: + """The names of the axes of the input coordinate space. + + Lifecycle: experimental + """ + return self._input_axes + + @property + def output_axes(self) -> Tuple[str, ...]: + """The names of the axes of the output coordinate space. + + Lifecycle: experimental + """ + return self._output_axes + + +class AffineTransform(CoordinateTransform): + """An affine coordinate trasformation from one coordinate space to another. + + An affine transform is a combination of a linear transformation and a translation. + + Args: + input_axes: The names of the axes for the input coordinate space. + output_axes: The names of the axes for the output coordinate space. + matrix: Matrix (perhaps augmented) that represents the affine transformation. + Can be provided as just the linear transform (if no translation), the + full augmented matrix, or the augmented matrix without the final row. + + Lifecycle: experimental + """ + + def __init__( + self, + input_axes: Union[str, Sequence[str]], + output_axes: Union[str, Sequence[str]], + matrix: npt.ArrayLike, + ): + super().__init__(input_axes, output_axes) + + # Check the rank of the input/output axes match. + if len(self.input_axes) != len(self.output_axes): + raise ValueError( + "The input axes and output axes must be the same length for an " + "affine transform." + ) + rank = len(self.input_axes) + + # Create and validate the augmented matrix. + self._matrix: npt.NDArray[np.float64] = np.array(matrix, dtype=np.float64) + if self._matrix.shape == (rank + 1, rank + 1): + if not ( + self._matrix[-1, -1] == 1.0 + and np.array_equal(self._matrix[-1, :-1], np.zeros((rank,))) + ): + raise ValueError( + f"Input matrix {self._matrix} has augmented matrix shape, but is not a valid " + f"augmented matrix." + ) + elif self._matrix.shape == (rank, rank + 1): + self._matrix = np.vstack( + ( + self._matrix, + np.hstack((np.zeros((rank,)), np.array([1]))), + ) + ) + elif self._matrix.shape == (rank, rank): + self._matrix = np.vstack( + ( + np.hstack((self._matrix, np.zeros((rank, 1)))), + np.hstack((np.zeros((rank,)), np.array([1]))), + ) + ) + else: + raise ValueError( + f"Unexpected shape {self._matrix.shape} for the input affine matrix." + ) + + def __matmul__(self, other: object) -> CoordinateTransform: + if not isinstance(other, CoordinateTransform): + raise NotImplementedError( + f"Matrix multiply is not implemented with type {type(other)!r}." + ) + self._check_matmul_inner_axes(other) + if isinstance(other, IdentityTransform): + return AffineTransform(other.input_axes, self.output_axes, self._matrix) + if isinstance(other, AffineTransform): + return AffineTransform( + other.input_axes, + self.output_axes, + self.augmented_matrix @ other.augmented_matrix, + ) + raise NotImplementedError( + f"Cannot multiply a CoordinateTransform by type {type(other)!r}." + ) + + @property + def augmented_matrix(self) -> npt.NDArray[np.float64]: + """Returns the augmented affine matrix for the transformation. + + Lifecycle: experimental + """ + return self._matrix + + def inverse_transform(self) -> "AffineTransform": + """Returns the inverse coordinate transform. + + Lifecycle: experimental + """ + rank = len(self.output_axes) + inv_a = np.linalg.inv(self._matrix[:-1, :-1]) + b2 = -inv_a @ self._matrix[:-1, -1].reshape((rank, 1)) + inv_augmented: npt.NDArray[np.float64] = np.vstack( + ( + np.hstack((inv_a, b2)), + np.hstack((np.zeros(rank), np.array([1]))), + ) + ) + return AffineTransform(self.output_axes, self.input_axes, inv_augmented) + + +class ScaleTransform(AffineTransform): + """A scale coordinate transformation from one coordinate space to another. + + Args: + input_axes: The names of the axes for the input coordinate space. + output_axes: The names of the axes for the output coordinate space. + scale_factors: The scale factors for the transformation. There must be one + value per axis. + + Lifecycle: experimental + """ + + def __init__( + self, + input_axes: Union[str, Sequence[str]], + output_axes: Union[str, Sequence[str]], + scale_factors: npt.ArrayLike, + ): + rank = str_or_seq_length(input_axes) + self._scale_factors: npt.NDArray[np.float64] = np.array( + scale_factors, dtype=np.float64 + ) + if self._scale_factors.size != rank: + raise ValueError( + f"Scale factors have unexpected shape={self._scale_factors.shape} " + f"for a transform with rank={rank}." + ) + self._scale_factors = self._scale_factors.reshape((rank,)) + + super().__init__(input_axes, output_axes, np.diag(self._scale_factors)) + + def __matmul__(self, other: object) -> CoordinateTransform: + if not isinstance(other, CoordinateTransform): + raise NotImplementedError( + f"Matrix multiply is not implemented with type {type(other)!r}." + ) + self._check_matmul_inner_axes(other) + if isinstance(other, ScaleTransform): + return ScaleTransform( + other.input_axes, + self.output_axes, + self.scale_factors * other.scale_factors, + ) + return super().__matmul__(other) + + def inverse_transform(self) -> "ScaleTransform": + """Returns the inverse coordinate transform. + + Lifecycle: experimental + """ + return ScaleTransform( + self.output_axes, self.input_axes, 1.0 / self._scale_factors + ) + + @property + def scale_factors(self) -> npt.NDArray[np.float64]: + """Returns the scale factors as an one-dimensional numpy array. + + Lifecycle: experimental + """ + return self._scale_factors + + +class UniformScaleTransform(ScaleTransform): + """A scale coordinate transformation from one coordinate space to another. + + Args: + input_axes: The names of the axes for the input coordinate space. + output_axes: The names of the axes for the output coordinate space. + scale: The scale factor for all axes. + + Lifecycle: experimental + """ + + def __init__( + self, + input_axes: Union[str, Sequence[str]], + output_axes: Union[str, Sequence[str]], + scale: Union[int, float, np.float64], + ): + self._scale = float(scale) + rank = str_or_seq_length(input_axes) + super().__init__(input_axes, output_axes, rank * [self._scale]) + + def __matmul__(self, other: object) -> CoordinateTransform: + if not isinstance(other, CoordinateTransform): + raise NotImplementedError( + f"Matrix multiply is not implemented with type {type(other)!r}." + ) + if isinstance(other, UniformScaleTransform): + self._check_matmul_inner_axes(other) + return UniformScaleTransform( + other.input_axes, self.output_axes, self.scale * other.scale + ) + return super().__matmul__(other) + + def inverse_transform(self) -> "UniformScaleTransform": + """Returns the inverse coordinate transform. + + Lifecycle: experimental + """ + return UniformScaleTransform( + self.output_axes, self.input_axes, 1.0 / self._scale + ) + + @property + def scale(self) -> float: + """Returns the scale factor for the uniform scale transform. + + Lifecycle: experimental + """ + return self._scale + + +class IdentityTransform(UniformScaleTransform): + """The identify transform from one coordinate space to another. + + This transform only changes the name of the axes. + + Args: + input_axes: The names of the axes for the input coordinate space. + output_axes: The names of the axes for the output coordinate space. + + Lifecycle: experimental + """ + + def __init__( + self, + input_axes: Union[str, Sequence[str]], + output_axes: Union[str, Sequence[str]], + ): + super().__init__(input_axes, output_axes, 1) + + def __matmul__(self, other: object) -> CoordinateTransform: + if not isinstance(other, CoordinateTransform): + raise NotImplementedError( + f"Matrix multiply is not implemented with type {type(other)!r}." + ) + if isinstance(other, IdentityTransform): + self._check_matmul_inner_axes(other) + return IdentityTransform(other.input_axes, self.output_axes) + return super().__matmul__(other) + + def inverse_transform(self) -> "IdentityTransform": + """Returns the inverse coordinate transform. + + Lifecycle: experimental + """ + return IdentityTransform(self.output_axes, self.input_axes) diff --git a/python-spec/src/somacore/types.py b/python-spec/src/somacore/types.py index 5a64153f..4902b8ed 100644 --- a/python-spec/src/somacore/types.py +++ b/python-spec/src/somacore/types.py @@ -7,7 +7,16 @@ import sys from concurrent import futures -from typing import TYPE_CHECKING, NoReturn, Optional, Sequence, Type, TypeVar +from typing import ( + TYPE_CHECKING, + NoReturn, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, +) from typing_extensions import Protocol, TypeGuard @@ -22,6 +31,25 @@ def is_nonstringy_sequence(it: object) -> TypeGuard[Sequence]: return not isinstance(it, (str, bytes)) and isinstance(it, Sequence) +def to_string_tuple(obj: Union[str, Sequence[str]]) -> Tuple[str, ...]: + """Returns a tuple of string values. + + If the input is a string, it is returned as a tuple with the string as its + only item. If it is otherwise a sequence of strings, the sequence is converted + to a tuple. + """ + return (obj,) if isinstance(obj, str) else tuple(obj) + + +def str_or_seq_length(obj: Union[str, Sequence[str]]) -> int: + """Returns the number of str values + + If input is a string, returns 1. Otherwise, returns the number of strings in the + sequence. + """ + return 1 if isinstance(obj, str) else len(obj) + + _T = TypeVar("_T") _T_co = TypeVar("_T_co", covariant=True) diff --git a/python-spec/testing/test_coordinates.py b/python-spec/testing/test_coordinates.py new file mode 100644 index 00000000..96e80cca --- /dev/null +++ b/python-spec/testing/test_coordinates.py @@ -0,0 +1,306 @@ +import numpy as np +import pytest + +from somacore import AffineTransform +from somacore import CoordinateTransform +from somacore import IdentityTransform +from somacore import ScaleTransform +from somacore import UniformScaleTransform + + +def check_transform_is_equal( + actual: CoordinateTransform, desired: CoordinateTransform +) -> None: + assert actual.input_axes == desired.input_axes + assert actual.output_axes == desired.output_axes + if isinstance(desired, IdentityTransform): + assert isinstance(actual, IdentityTransform) + elif isinstance(desired, UniformScaleTransform): + assert isinstance(actual, UniformScaleTransform) + assert actual.scale == desired.scale + elif isinstance(desired, ScaleTransform): + assert isinstance(actual, ScaleTransform) + np.testing.assert_array_equal(actual.scale_factors, desired.scale_factors) + elif isinstance(desired, AffineTransform): + assert isinstance(actual, AffineTransform) + np.testing.assert_array_equal(actual.augmented_matrix, desired.augmented_matrix) + else: + assert False + + +@pytest.mark.parametrize( + ("input", "expected"), + [ + ( + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + [[2, 2, 0], [0, 3, 1]], + ), + np.array([[2, 2, 0], [0, 3, 1], [0, 0, 1]], np.float64), + ), + ( + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + [[2, 2], [0, 3]], + ), + np.array([[2, 2, 0], [0, 3, 0], [0, 0, 1]], np.float64), + ), + ( + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + [[2, 2, 0], [0, 3, 1], [0, 0, 1]], + ), + np.array([[2, 2, 0], [0, 3, 1], [0, 0, 1]], np.float64), + ), + ], +) +def test_affine_augmented_matrix(input, expected): + result = input.augmented_matrix + np.testing.assert_array_equal(result, expected) + + +@pytest.mark.parametrize( + ("input_matrix",), [([1, 2, 3],), ([[1, 0, 1], [0, 1, 1], [1, 0, 1]],)] +) +def test_affine_matrix_value_error(input_matrix): + with pytest.raises(ValueError): + AffineTransform(("x1", "y1"), ("x2", "y2"), input_matrix) + + +def test_bad_number_of_scale_factors(): + with pytest.raises(ValueError): + ScaleTransform(("x1", "y1"), ("x2", "y2"), [1, 2, 3]) + + +@pytest.mark.parametrize( + ("input", "expected"), + [ + ( + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + [[1, 0, 0], [0, 1, 0]], + ), + AffineTransform( + ["x2", "y2"], + ["x1", "y1"], + [[1, 0, 0], [0, 1, 0]], + ), + ), + ( + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + [[1, 0, 5], [0, 1, 10]], + ), + AffineTransform( + ["x2", "y2"], + ["x1", "y1"], + [[1, 0, -5], [0, 1, -10]], + ), + ), + ( + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + [[2, 0, -5], [0, 4, 5]], + ), + AffineTransform( + ["x2", "y2"], + ["x1", "y1"], + [[0.5, 0, 2.5], [0, 0.25, -1.25]], + ), + ), + ( + ScaleTransform(["x1", "y1"], ["x2", "y2"], [4, 0.1]), + ScaleTransform(["x2", "y2"], ["x1", "y1"], [0.25, 10]), + ), + ( + UniformScaleTransform(["x1", "y1"], ["x2", "y2"], 10), + UniformScaleTransform(["x2", "y2"], ["x1", "y1"], 0.1), + ), + ( + IdentityTransform(["x1", "y1"], ["x2", "y2"]), + IdentityTransform(["x2", "y2"], ["x1", "y1"]), + ), + ], +) +def test_inverse_transform(input, expected): + result = input.inverse_transform() + check_transform_is_equal(result, expected) + result_matrix = input.augmented_matrix @ result.augmented_matrix + expected_matrix: np.ndarray = np.identity( + len(result.input_axes) + 1, dtype=np.float64 + ) + np.testing.assert_allclose(result_matrix, expected_matrix) + + +def test_uniform_scale_factor(): + UniformScaleTransform(["x1", "y1"], ["x2", "y2"], 1.5) + UniformScaleTransform(["x1", "y1"], ["x3", "y3"], 1.5) + + +@pytest.mark.parametrize( + ("transform_a", "transform_b", "expected"), + [ + ( + IdentityTransform(["x2", "y2"], ["x3", "y3"]), + IdentityTransform(["x1", "y1"], ["x2", "y2"]), + IdentityTransform(["x1", "y1"], ["x3", "y3"]), + ), + ( + IdentityTransform(["x2", "y2"], ["x3", "y3"]), + UniformScaleTransform(["x1", "y1"], ["x2", "y2"], 1.5), + UniformScaleTransform(["x1", "y1"], ["x3", "y3"], 1.5), + ), + ( + IdentityTransform(["x2", "y2"], ["x3", "y3"]), + ScaleTransform( + ["x1", "y1"], ["x2", "y2"], np.array([1.5, 3.0], dtype=np.float64) + ), + ScaleTransform( + ["x1", "y1"], ["x3", "y3"], np.array([1.5, 3.0], dtype=np.float64) + ), + ), + ( + IdentityTransform(["x2", "y2"], ["x3", "y3"]), + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + np.array([[1.5, 3.0, 0.0], [-1.5, 3.0, 1.0]], dtype=np.float64), + ), + AffineTransform( + ["x1", "y1"], + ["x3", "y3"], + np.array([[1.5, 3.0, 0.0], [-1.5, 3.0, 1.0]], dtype=np.float64), + ), + ), + ( + UniformScaleTransform(["x2", "y2"], ["x3", "y3"], 1.5), + IdentityTransform(["x1", "y1"], ["x2", "y2"]), + UniformScaleTransform(["x1", "y1"], ["x3", "y3"], 1.5), + ), + ( + UniformScaleTransform(["x2", "y2"], ["x3", "y3"], 1.5), + UniformScaleTransform(["x1", "y1"], ["x2", "y2"], 3.0), + UniformScaleTransform(["x1", "y1"], ["x3", "y3"], 4.5), + ), + ( + UniformScaleTransform(["x2", "y2"], ["x3", "y3"], -0.5), + ScaleTransform(["x1", "y1"], ["x2", "y2"], [-3.0, 3.0]), + ScaleTransform(["x1", "y1"], ["x3", "y3"], [1.5, -1.5]), + ), + ( + UniformScaleTransform(["x2", "y2"], ["x3", "y3"], 0.5), + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + np.array([[1.5, 3.0, 0.0], [-1.5, 3.0, 1.0]], dtype=np.float64), + ), + AffineTransform( + ["x1", "y1"], + ["x3", "y3"], + np.array([[0.75, 1.5, 0.0], [-0.75, 1.5, 0.5]], dtype=np.float64), + ), + ), + ( + ScaleTransform( + ["x2", "y2"], ["x3", "y3"], np.array([1.5, 3.0], dtype=np.float64) + ), + IdentityTransform(["x1", "y1"], ["x2", "y2"]), + ScaleTransform( + ["x1", "y1"], ["x3", "y3"], np.array([1.5, 3.0], dtype=np.float64) + ), + ), + ( + ScaleTransform(["x2", "y2"], ["x3", "y3"], [1.0, -1.0]), + UniformScaleTransform(["x1", "y1"], ["x2", "y2"], 1.5), + ScaleTransform(["x1", "y1"], ["x3", "y3"], [1.5, -1.5]), + ), + ( + ScaleTransform(["x2", "y2"], ["x3", "y3"], [1.5, -1.0]), + ScaleTransform(["x1", "y1"], ["x2", "y2"], [2.0, 1.5]), + ScaleTransform(["x1", "y1"], ["x3", "y3"], [3.0, -1.5]), + ), + ( + ScaleTransform(["x2", "y2"], ["x3", "y3"], [0.5, -0.5]), + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + np.array([[1.5, 3.0, 0.0], [-1.5, 3.0, 1.0]], dtype=np.float64), + ), + AffineTransform( + ["x1", "y1"], + ["x3", "y3"], + np.array([[0.75, 1.5, 0.0], [0.75, -1.5, -0.5]], dtype=np.float64), + ), + ), + ( + AffineTransform( + ["x2", "y2"], + ["x3", "y3"], + np.array([[1.5, 3.0, 0.0], [-1.5, 3.0, 1.0]], dtype=np.float64), + ), + IdentityTransform(["x1", "y1"], ["x2", "y2"]), + AffineTransform( + ["x1", "y1"], + ["x3", "y3"], + np.array([[1.5, 3.0, 0.0], [-1.5, 3.0, 1.0]], dtype=np.float64), + ), + ), + ( + AffineTransform( + ["x2", "y2"], + ["x3", "y3"], + np.array([[1.5, 3.0, 0.0], [-1.5, 3.0, 1.0]], dtype=np.float64), + ), + UniformScaleTransform(["x1", "y1"], ["x2", "y2"], 2.0), + AffineTransform( + ["x1", "y1"], + ["x3", "y3"], + np.array([[3.0, 6.0, 0.0], [-3.0, 6.0, 1.0]], dtype=np.float64), + ), + ), + ( + AffineTransform( + ["x2", "y2"], + ["x3", "y3"], + np.array([[1.5, 3.0, 0.0], [-1.5, 3.0, 1.0]], dtype=np.float64), + ), + ScaleTransform(["x1", "y1"], ["x2", "y2"], [2.0, -2.0]), + AffineTransform( + ["x1", "y1"], + ["x3", "y3"], + np.array([[3.0, -6.0, 0.0], [-3.0, -6.0, 1.0]], dtype=np.float64), + ), + ), + ( + AffineTransform( + ["x2", "y2"], + ["x3", "y3"], + np.array([[2.0, 0.0, 1.0], [0.0, 4.0, 1.0]], dtype=np.float64), + ), + AffineTransform( + ["x1", "y1"], + ["x2", "y2"], + np.array([[1.0, 1.0, -1.0], [0.0, 1.0, 2.0]], dtype=np.float64), + ), + AffineTransform( + ["x1", "y1"], + ["x3", "y3"], + np.array([[2.0, 2.0, -1.0], [0.0, 4.0, 9.0]], dtype=np.float64), + ), + ), + ], + ids=lambda val: type(val).__name__, +) +def test_multiply_tranform( + transform_a, + transform_b, + expected: CoordinateTransform, +): + result = transform_a @ transform_b + check_transform_is_equal(result, expected) From 948754c63b7989474138dbf6ce0209daa1734948 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Mon, 23 Sep 2024 15:59:04 -0400 Subject: [PATCH 02/17] CI version bumps, deps factor (#225) * bump GHA `checkout`, `setup-python` versions * remove dupes from `requirements-py3.8-lint.txt` * rm unnecessary lint deps --- .github/workflows/python-somacore.yaml | 14 ++++----- python-spec/requirements-py3.8-lint.txt | 39 ++----------------------- 2 files changed, 9 insertions(+), 44 deletions(-) diff --git a/.github/workflows/python-somacore.yaml b/.github/workflows/python-somacore.yaml index f27d6c70..b6611425 100644 --- a/.github/workflows/python-somacore.yaml +++ b/.github/workflows/python-somacore.yaml @@ -20,11 +20,11 @@ jobs: with: python-version: ${{ env.PYTHON_VERSION }} cache: pip - cache-dependency-path: python-spec/requirements-py${{ env.PYTHON_VERSION }}-lint.txt + cache-dependency-path: "python-spec/requirements-py${{ env.PYTHON_VERSION }}*.txt" - name: Install static analysis packages - run: | - pip install -r python-spec/requirements-py${{ env.PYTHON_VERSION }}-lint.txt + working-directory: python-spec + run: pip install -r requirements-py${PYTHON_VERSION}.txt -r requirements-py${PYTHON_VERSION}-lint.txt - name: Restore pre-commit cache uses: actions/cache@v3 @@ -47,12 +47,12 @@ jobs: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: # setuptools-scm needs a deep clone so it can look through history # to find a relevant tag. fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip @@ -78,8 +78,8 @@ jobs: && startsWith(github.event.release.tag_name, 'python-') runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: cache: pip cache-dependency-path: python-spec/requirements-py3.10.txt diff --git a/python-spec/requirements-py3.8-lint.txt b/python-spec/requirements-py3.8-lint.txt index 62af1603..5b1ec208 100644 --- a/python-spec/requirements-py3.8-lint.txt +++ b/python-spec/requirements-py3.8-lint.txt @@ -1,38 +1,3 @@ -anndata==0.9.2 -attrs==23.2.0 -black==24.4.2 -cfgv==3.4.0 -click==8.1.7 -distlib==0.3.8 -filelock==3.15.1 -h5py==3.11.0 -identify==2.5.36 -importlib_metadata==7.1.0 -isort==5.13.2 -llvmlite==0.41.1 -mypy==1.10.0 -mypy-extensions==1.0.0 -natsort==8.4.0 -nodeenv==1.9.1 -numba==0.58.1 -numpy==1.24.4 -packaging==24.1 -pandas==2.0.3 -pandas-stubs==2.0.3.230814 -pathspec==0.12.1 -platformdirs==4.2.2 +mypy==1.11.2 +pandas-stubs==2.0.3.230814 # last version that supports Python 3.8 pre-commit==3.5.0 -pyarrow==16.1.0 -pyarrow-hotfix==0.6 -python-dateutil==2.9.0.post0 -pytz==2024.1 -PyYAML==6.0.1 -ruff==0.4.9 -scipy==1.10.1 -six==1.16.0 -tomli==2.0.1 -types-pytz==2024.1.0.20240417 -typing_extensions==4.12.2 -tzdata==2024.1 -virtualenv==20.26.2 -zipp==3.19.2 From 18e9b06ac397e1a807f3700b7ae5f55eb6b31ca1 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Mon, 23 Sep 2024 16:00:51 -0400 Subject: [PATCH 03/17] comment nits (#226) --- python-spec/src/somacore/coordinates.py | 5 ++--- python-spec/src/somacore/types.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/python-spec/src/somacore/coordinates.py b/python-spec/src/somacore/coordinates.py index 986ed83e..727dac3d 100644 --- a/python-spec/src/somacore/coordinates.py +++ b/python-spec/src/somacore/coordinates.py @@ -26,9 +26,7 @@ class Axis: @attrs.define(frozen=True) -class CoordinateSpace( - collections.abc.Sequence -): # Change to Sequence[Axis] after 3.8 is dropped. +class CoordinateSpace(collections.abc.Sequence): """A coordinate space for spatial data. Args: @@ -37,6 +35,7 @@ class CoordinateSpace( Lifecycle: experimental """ + # Change to Sequence[Axis] after 3.8 is dropped. axes: Tuple[Axis, ...] = attrs.field(converter=tuple) @axes.validator diff --git a/python-spec/src/somacore/types.py b/python-spec/src/somacore/types.py index 4902b8ed..45b9a12c 100644 --- a/python-spec/src/somacore/types.py +++ b/python-spec/src/somacore/types.py @@ -57,7 +57,7 @@ def str_or_seq_length(obj: Union[str, Sequence[str]]) -> int: class Slice(Protocol[_T_co]): """A slice which stores a certain type of object. - This protocol describes the built in ``slice`` type, with a hint to callers + This protocol describes the built-in ``slice`` type, with a hint to callers about what type they should put *inside* the slice. It is for type annotations only and is not runtime-checkable (i.e., you can't do ``isinstance(thing, Slice)``), because ``range`` objects also have @@ -86,7 +86,7 @@ def stop(self) -> Optional[_T_co]: ... def step(self) -> Optional[_T_co]: ... if sys.version_info < (3, 10) and not TYPE_CHECKING: - # Python 3.9 and below have a bug where any Protocol with an @property + # Python 3.9 and below have a bug where any Protocol with a @property # was always regarded as runtime-checkable. @classmethod def __subclasscheck__(cls, __subclass: type) -> NoReturn: From 63c7ba8d00f21970e2970e6f4a7e8c63c36d9685 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Mon, 23 Sep 2024 16:04:54 -0400 Subject: [PATCH 04/17] drop Python 3.8 support (#224) --- .github/workflows/python-somacore.yaml | 4 ++-- pyproject.toml | 6 +++--- python-spec/requirements-py3.8-lint.txt | 3 --- python-spec/requirements-py3.8.txt | 19 ------------------- python-spec/requirements-py3.9-lint.txt | 3 +++ python-spec/src/somacore/coordinates.py | 3 +-- python-spec/update-requirements-txt | 4 ++-- 7 files changed, 11 insertions(+), 31 deletions(-) delete mode 100644 python-spec/requirements-py3.8-lint.txt delete mode 100644 python-spec/requirements-py3.8.txt create mode 100644 python-spec/requirements-py3.9-lint.txt diff --git a/.github/workflows/python-somacore.yaml b/.github/workflows/python-somacore.yaml index b6611425..a23d6773 100644 --- a/.github/workflows/python-somacore.yaml +++ b/.github/workflows/python-somacore.yaml @@ -12,7 +12,7 @@ jobs: format-check: runs-on: ubuntu-latest env: - PYTHON_VERSION: "3.8" + PYTHON_VERSION: "3.9" steps: - uses: actions/checkout@v3 @@ -45,7 +45,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: diff --git a/pyproject.toml b/pyproject.toml index 1cf68cc6..63c415fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "scipy", "typing-extensions>=4.1", # For LiteralString (py3.11) ] -requires-python = "~=3.8" +requires-python = ">=3.9" urls = { repository = "https://github.com/single-cell-data/SOMA.git" } classifiers = ["License :: OSI Approved :: MIT License"] @@ -43,7 +43,7 @@ tag_regex = '^python-(?P[vV]?\d+(?:\.\d+){0,2}[^\+]*)(?:\+.*)?$' [tool.ruff] lint.extend-select = ["I"] -target-version = "py38" +target-version = "py39" [tool.ruff.lint.isort] force-single-line = true @@ -54,7 +54,7 @@ single-line-exclusions = ["typing", "typing_extensions"] check_untyped_defs = true enable_error_code = ["ignore-without-code"] warn_redundant_casts = true -python_version = 3.8 +python_version = 3.9 # We want to enable this but it won't work when running locally due to the # presence of _version.py (which invalidates the ignore, which causes an error). # diff --git a/python-spec/requirements-py3.8-lint.txt b/python-spec/requirements-py3.8-lint.txt deleted file mode 100644 index 5b1ec208..00000000 --- a/python-spec/requirements-py3.8-lint.txt +++ /dev/null @@ -1,3 +0,0 @@ -mypy==1.11.2 -pandas-stubs==2.0.3.230814 # last version that supports Python 3.8 -pre-commit==3.5.0 diff --git a/python-spec/requirements-py3.8.txt b/python-spec/requirements-py3.8.txt deleted file mode 100644 index 7da2b9f5..00000000 --- a/python-spec/requirements-py3.8.txt +++ /dev/null @@ -1,19 +0,0 @@ -anndata==0.9.2 -attrs==23.2.0 -h5py==3.11.0 -importlib_metadata==7.1.0 -llvmlite==0.41.1 -natsort==8.4.0 -numba==0.58.1 -numpy==1.24.4 -packaging==24.1 -pandas==2.0.3 -pyarrow==16.1.0 -pyarrow-hotfix==0.6 -python-dateutil==2.9.0.post0 -pytz==2024.1 -scipy==1.10.1 -six==1.16.0 -typing_extensions==4.12.2 -tzdata==2024.1 -zipp==3.19.2 diff --git a/python-spec/requirements-py3.9-lint.txt b/python-spec/requirements-py3.9-lint.txt new file mode 100644 index 00000000..aef1a068 --- /dev/null +++ b/python-spec/requirements-py3.9-lint.txt @@ -0,0 +1,3 @@ +mypy==1.11.2 +pandas-stubs==2.2.2.240807 # last version which supports Python 3.9 +pre-commit==3.8.0 diff --git a/python-spec/src/somacore/coordinates.py b/python-spec/src/somacore/coordinates.py index 727dac3d..83555e88 100644 --- a/python-spec/src/somacore/coordinates.py +++ b/python-spec/src/somacore/coordinates.py @@ -35,8 +35,7 @@ class CoordinateSpace(collections.abc.Sequence): Lifecycle: experimental """ - # Change to Sequence[Axis] after 3.8 is dropped. - axes: Tuple[Axis, ...] = attrs.field(converter=tuple) + axes: Sequence[Axis] = attrs.field(converter=tuple) @axes.validator def _validate(self, _, axes: Tuple[Axis, ...]) -> None: diff --git a/python-spec/update-requirements-txt b/python-spec/update-requirements-txt index 172bee02..42f9b88a 100755 --- a/python-spec/update-requirements-txt +++ b/python-spec/update-requirements-txt @@ -10,9 +10,9 @@ TEMPDIR="$(mktemp -d)" trap "trash $TEMPDIR" EXIT # The version of Python we want to run lints under. -LINTVER=3.8 +LINTVER=3.9 -for PYVER in 3.8 3.9 3.10 3.11 3.12; do +for PYVER in 3.9 3.10 3.11 3.12; do CONDIR="$TEMPDIR/py-$PYVER" conda create -y -p "$CONDIR" "python=$PYVER" ( From e71c15df4fb5522b13a55d2317d785438a9b4904 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:39:08 -0400 Subject: [PATCH 05/17] Update CoordinateSpace type hints (#227) --- python-spec/src/somacore/coordinates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-spec/src/somacore/coordinates.py b/python-spec/src/somacore/coordinates.py index 83555e88..3f09ef6c 100644 --- a/python-spec/src/somacore/coordinates.py +++ b/python-spec/src/somacore/coordinates.py @@ -26,7 +26,7 @@ class Axis: @attrs.define(frozen=True) -class CoordinateSpace(collections.abc.Sequence): +class CoordinateSpace(collections.abc.Sequence[Axis]): """A coordinate space for spatial data. Args: @@ -35,7 +35,7 @@ class CoordinateSpace(collections.abc.Sequence): Lifecycle: experimental """ - axes: Sequence[Axis] = attrs.field(converter=tuple) + axes: Tuple[Axis] = attrs.field(converter=tuple) @axes.validator def _validate(self, _, axes: Tuple[Axis, ...]) -> None: From eb60084d979245d89c34f25960bd1d60c6306df2 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Tue, 24 Sep 2024 15:14:46 -0400 Subject: [PATCH 06/17] Fix type hint for CoordinateSpace `axis` (#228) --- python-spec/src/somacore/coordinates.py | 2 +- python-spec/testing/test_coordinates.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/python-spec/src/somacore/coordinates.py b/python-spec/src/somacore/coordinates.py index 3f09ef6c..2703cb24 100644 --- a/python-spec/src/somacore/coordinates.py +++ b/python-spec/src/somacore/coordinates.py @@ -35,7 +35,7 @@ class CoordinateSpace(collections.abc.Sequence[Axis]): Lifecycle: experimental """ - axes: Tuple[Axis] = attrs.field(converter=tuple) + axes: Tuple[Axis, ...] = attrs.field(converter=tuple) @axes.validator def _validate(self, _, axes: Tuple[Axis, ...]) -> None: diff --git a/python-spec/testing/test_coordinates.py b/python-spec/testing/test_coordinates.py index 96e80cca..1bccb8a2 100644 --- a/python-spec/testing/test_coordinates.py +++ b/python-spec/testing/test_coordinates.py @@ -2,6 +2,8 @@ import pytest from somacore import AffineTransform +from somacore import Axis +from somacore import CoordinateSpace from somacore import CoordinateTransform from somacore import IdentityTransform from somacore import ScaleTransform @@ -28,6 +30,15 @@ def check_transform_is_equal( assert False +def test_coordinate_space(): + coord_space = CoordinateSpace( + (Axis("x", unit="nanometer"), Axis("y", unit="nanometer")) # type: ignore[arg-type] + ) + assert len(coord_space) == 2 + assert coord_space.axis_names == ("x", "y") + assert coord_space[0] == Axis("x", unit="nanometer") + + @pytest.mark.parametrize( ("input", "expected"), [ From d41bbb1af637013dd97d0954fbf315780e58d017 Mon Sep 17 00:00:00 2001 From: Aaron Wolen Date: Wed, 25 Sep 2024 16:14:50 -0500 Subject: [PATCH 07/17] Read obsm/varm arrays from _axism_inner (#229) --- python-spec/src/somacore/query/query.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python-spec/src/somacore/query/query.py b/python-spec/src/somacore/query/query.py index 3823670a..ac22792b 100644 --- a/python-spec/src/somacore/query/query.py +++ b/python-spec/src/somacore/query/query.py @@ -592,8 +592,7 @@ def _axism_inner_ndarray( axis: "_Axis", layer: str, ) -> np.ndarray: - axism = axis.getitem_from(self._ms, suf="m") - table = axism[layer].read().tables().concat() + table = self._axism_inner(axis, layer).tables().concat() n_row = len(axis.getattr_from(self._joinids)) n_col = len(table["soma_dim_1"].unique()) From 3d9e34fe2c933ca560ec7eea19aecda5efe6ec54 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Thu, 26 Sep 2024 12:28:39 -0400 Subject: [PATCH 08/17] Add new spatial foundational types (#219) New classes: * PointCloud: A spatial data frame for point data * GeometryDataFrame: A spatial data for polygon data * MultiscaleImage: An image class that can contain multiple image levels. * ImageProperties: A protocol with the required properties for image levels. * SpatialRead: New dataclass for returning read data with coordinate space information. Co-authored-by: nguyenv Co-authored-by: Aaron Wolen --- pyproject.toml | 3 +- python-spec/requirements-py3.10.txt | 1 + python-spec/requirements-py3.11.txt | 1 + python-spec/requirements-py3.12.txt | 1 + python-spec/requirements-py3.9.txt | 1 + python-spec/src/somacore/__init__.py | 11 + python-spec/src/somacore/options.py | 10 + python-spec/src/somacore/spatial.py | 782 +++++++++++++++++++++++++++ 8 files changed, 809 insertions(+), 1 deletion(-) create mode 100644 python-spec/src/somacore/spatial.py diff --git a/pyproject.toml b/pyproject.toml index 63c415fa..82a93840 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ # Remove this once we can specify a recent pyarrow. "pyarrow-hotfix", "scipy", + "shapely", "typing-extensions>=4.1", # For LiteralString (py3.11) ] requires-python = ">=3.9" @@ -62,5 +63,5 @@ python_version = 3.9 [[tool.mypy.overrides]] # These dependencies do not currently have canonical type stubs. -module = ["anndata", "pyarrow", "pyarrow_hotfix", "scipy"] +module = ["anndata", "pyarrow", "pyarrow.compute", "pyarrow_hotfix", "scipy", "shapely"] ignore_missing_imports = true diff --git a/python-spec/requirements-py3.10.txt b/python-spec/requirements-py3.10.txt index 068bd7e4..c2fd0fe3 100644 --- a/python-spec/requirements-py3.10.txt +++ b/python-spec/requirements-py3.10.txt @@ -14,6 +14,7 @@ pyarrow-hotfix==0.6 python-dateutil==2.9.0.post0 pytz==2024.1 scipy==1.13.1 +shapely==2.0.4 six==1.16.0 typing_extensions==4.12.2 tzdata==2024.1 diff --git a/python-spec/requirements-py3.11.txt b/python-spec/requirements-py3.11.txt index e7bdc654..665528ff 100644 --- a/python-spec/requirements-py3.11.txt +++ b/python-spec/requirements-py3.11.txt @@ -12,6 +12,7 @@ pyarrow==16.1.0 pyarrow-hotfix==0.6 python-dateutil==2.9.0.post0 pytz==2024.1 +shapely==2.0.4 scipy==1.13.1 six==1.16.0 typing_extensions==4.12.2 diff --git a/python-spec/requirements-py3.12.txt b/python-spec/requirements-py3.12.txt index 5024ca05..0fe2f050 100644 --- a/python-spec/requirements-py3.12.txt +++ b/python-spec/requirements-py3.12.txt @@ -14,6 +14,7 @@ python-dateutil==2.9.0.post0 pytz==2024.1 scipy==1.13.1 setuptools==70.0.0 +shapely==2.0.4 six==1.16.0 typing_extensions==4.12.2 tzdata==2024.1 diff --git a/python-spec/requirements-py3.9.txt b/python-spec/requirements-py3.9.txt index 9bd3fe44..db2c51ae 100644 --- a/python-spec/requirements-py3.9.txt +++ b/python-spec/requirements-py3.9.txt @@ -18,6 +18,7 @@ pytz==2024.1 rsa==4.7.2 s3transfer==0.6.0 scipy==1.13.1 +shapely==2.0.4 six==1.16.0 typing_extensions==4.12.2 tzdata==2024.1 diff --git a/python-spec/src/somacore/__init__.py b/python-spec/src/somacore/__init__.py index 9946871d..4cf00dab 100644 --- a/python-spec/src/somacore/__init__.py +++ b/python-spec/src/somacore/__init__.py @@ -37,6 +37,11 @@ from .query import AxisColumnNames from .query import AxisQuery from .query import ExperimentAxisQuery +from .spatial import GeometryDataFrame +from .spatial import ImageProperties +from .spatial import MultiscaleImage +from .spatial import PointCloud +from .spatial import SpatialRead from .types import ContextBase try: @@ -59,8 +64,14 @@ "ReadIter", "SparseNDArray", "SparseRead", + "SpatialRead", "Experiment", "Measurement", + "ImageProperties", + "MultiscaleImage", + "SpatialDataFrame", + "GeometryDataFrame", + "PointCloud", "BatchSize", "IOfN", "ResultOrder", diff --git a/python-spec/src/somacore/options.py b/python-spec/src/somacore/options.py index 6a6e629f..17faadb6 100644 --- a/python-spec/src/somacore/options.py +++ b/python-spec/src/somacore/options.py @@ -11,6 +11,7 @@ import numpy as np import numpy.typing as npt import pyarrow as pa +import shapely from typing_extensions import Final, Literal from . import types @@ -18,6 +19,9 @@ SOMA_JOINID: Final = "soma_joinid" """Global constant for the SOMA join ID.""" +SOMA_GEOMETRY: Final = "soma_geometry" +"""Global constant for SOMA spatial geometry type.""" + OpenMode = Literal["r", "w"] """How to open a SOMA object: read or write.""" @@ -177,5 +181,11 @@ class ResultOrder(enum.Enum): pa.ChunkedArray, ] """A single coordinate range for one dimension of a sparse ndarray.""" + SparseNDCoords = Sequence[SparseNDCoord] """A sequence of coordinate ranges for reading sparse ndarrays.""" + +SpatialRegion = Union[ + Sequence[int], Sequence[float], shapely.geometry.base.BaseGeometry +] +"""A spatial region used for reading spatial dataframes and multiscale images.""" diff --git a/python-spec/src/somacore/spatial.py b/python-spec/src/somacore/spatial.py new file mode 100644 index 00000000..3d739840 --- /dev/null +++ b/python-spec/src/somacore/spatial.py @@ -0,0 +1,782 @@ +"""Implementation of the SOMA image collection for spatial data""" + +import abc +from dataclasses import dataclass +from typing import ( + Any, + Generic, + MutableMapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) + +import pyarrow as pa +from typing_extensions import Final, Protocol, Self + +from . import base +from . import coordinates +from . import data +from . import options + +_DenseND = TypeVar("_DenseND", bound=data.DenseNDArray) +"""A particular implementation of a collection of DenseNDArrays.""" +_RootSO = TypeVar("_RootSO", bound=base.SOMAObject) +"""The root SomaObject type of the implementation.""" + +_RO_AUTO = options.ResultOrder.AUTO +# +# Read types +# + +_ReadData = TypeVar("_ReadData") + + +class PointCloud(base.SOMAObject, metaclass=abc.ABCMeta): + """A specialized SOMA DataFrame for storing collections of points in multi-dimensional space. + + The ``PointCloud`` class is designed to efficiently store and query point data, where each + point is represented by coordinates in one or more spatial dimensions (e.g., x, y, z) and + may have additional columns for associated attributes. + + Lifecycle: experimental + """ + + __slots__ = () + soma_type: Final = "SOMAPointCloud" # type: ignore[misc] + + @classmethod + @abc.abstractmethod + def create( + cls, + uri: str, + *, + schema: pa.Schema, + index_column_names: Sequence[str] = (options.SOMA_JOINID, "x", "y"), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + context: Optional[Any] = None, + ) -> Self: + """Creates a new ``PointCloud`` at the given URI. + + The schema of the created point cloud will include a column named + ``soma_joinid`` of type ``pyarrow.int64``, with negative values disallowed, and + at least one axis with numeric type. If a ``soma_joinid`` column is + present in the provided schema, it must be of the correct type. If the + ``soma_joinid`` column is not provided, one will be added. The ``soma_joinid`` + may be an index column. The axis columns must be index columns. + + Args: + uri: The URI where the dataframe will be created. + schema: Arrow schema defining the per-column schema. This schema + must define all columns, including columns to be named as index + columns. If the schema includes types unsupported by the SOMA + implementation, an error will be raised. + index_column_names: A list of column names to use as user-defined index + columns (e.g., ``['x', 'y']``). All named columns must exist in the + schema, and at least one index column name is required. + axis_names: An ordered list of axis column names that correspond to the + names of axes of the the coordinate space the points are defined on. + Must be the name of index columns. + domain: An optional sequence of tuples specifying the domain of each + index column. Each tuple should be a pair consisting of the minimum + and maximum values storable in the index column. If omitted entirely, + or if ``None`` in a given dimension, the corresponding index-column + domain will use the minimum and maximum possible values for the + column's datatype. This makes a point cloud dataframe growable. + + Returns: + The newly created geometry dataframe, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + # Data operations + + @abc.abstractmethod + def read( + self, + coords: options.SparseDFCoords = (), + column_names: Optional[Sequence[str]] = None, + *, + batch_size: options.BatchSize = options.BatchSize(), + partitions: Optional[options.ReadPartitions] = None, + result_order: options.ResultOrderStr = _RO_AUTO, + value_filter: Optional[str] = None, + platform_config: Optional[options.PlatformConfig] = None, + ) -> data.ReadIter[pa.Table]: + """Reads a user-defined slice of data into Arrow tables. + + Args: + coords: for each index dimension, which rows to read. + Defaults to ``()``, meaning no constraint -- all IDs. + column_names: the named columns to read and return. + Defaults to ``None``, meaning no constraint -- all column names. + partitions: If present, specifies that this is part of + a partitioned read, and which part of the data to include. + result_order: the order to return results, specified as a + :class:`~options.ResultOrder` or its string value. + value_filter: an optional value filter to apply to the results. + The default of ``None`` represents no filter. Value filter + syntax is implementation-defined; see the documentation + for the particular SOMA implementation for details. + Returns: + A :class:`ReadIter` of :class:`pa.Table`s. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def read_spatial_region( + self, + region: Optional[options.SpatialRegion] = None, + column_names: Optional[Sequence[str]] = None, + *, + region_transform: Optional[coordinates.CoordinateTransform] = None, + region_coord_space: Optional[coordinates.CoordinateSpace] = None, + batch_size: options.BatchSize = options.BatchSize(), + partitions: Optional[options.ReadPartitions] = None, + result_order: options.ResultOrderStr = _RO_AUTO, + value_filter: Optional[str] = None, + platform_config: Optional[options.PlatformConfig] = None, + ) -> "SpatialRead[data.ReadIter[pa.Table]]": + """Reads data intersecting an user-defined region of space into a + :class:`SpatialRead` with data in Arrow tables. + + + Args: + region: The region to query. May be a box in the form + [x_min, y_min, x_max, y_max] (for 2D images), a box in the form + [x_min, y_min, z_min, x_max, y_max, z_max] (for 3D images), or + a shapely Geometry. + column_names: The named columns to read and return. + Defaults to ``None``, meaning no constraint -- all column names. + region_transform: An optional coordinate transform from the read region to the + coordinate system of the spatial dataframe. + Defaults to ``None``, meaning an identity transform. + region_coord_space: An optional coordinate space for the region being read. + Defaults to ``None``, coordinate space will be inferred from transform. + batch_size: The size of batched reads. + Defaults to `unbatched`. + partitions: If present, specifies that this is part of a partitioned read, + and which part of the data to include. + result_order: the order to return results, specified as a + :class:`~options.ResultOrder` or its string value. + value_filter: an optional value filter to apply to the results. + The default of ``None`` represents no filter. Value filter + syntax is implementation-defined; see the documentation + for the particular SOMA implementation for details. + + Returns: + A :class:`SpatialRead` with :class:`ReadIter` of :class:`pa.Table`s data. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def write( + self, + values: Union[pa.RecordBatch, pa.Table], + *, + platform_config: Optional[options.PlatformConfig] = None, + ) -> Self: + """Writes the data from an Arrow table to the persistent object. + + As duplicate index values are not allowed, index values already present + in the object are overwritten and new index values are added. + + Args: + values: An Arrow table containing all columns, including + the index columns. The schema for the values must match + the schema for the ``DataFrame``. + + Returns: ``self``, to enable method chaining. + + Lifecycle: experimental + """ + raise NotImplementedError() + + # Metadata operations + + @property + @abc.abstractmethod + def schema(self) -> pa.Schema: + """The schema of the data in this dataframe. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def index_column_names(self) -> Tuple[str, ...]: + """The names of the index (dimension) columns. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def coordinate_space(self) -> Optional[coordinates.CoordinateSpace]: + """Coordinate space for this point cloud. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @coordinate_space.setter + @abc.abstractmethod + def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: + """Coordinate space for this point cloud. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def axis_names(self) -> Tuple[str, ...]: + """The names of the axes of the coordinate space the data is defined on. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def domain(self) -> Tuple[Tuple[Any, Any], ...]: + """The allowable range of values in each index column. + + Returns: a tuple of minimum and maximum values, inclusive, + storable on each index column of the dataframe. + + Lifecycle: experimental + """ + raise NotImplementedError() + + +class GeometryDataFrame(base.SOMAObject, metaclass=abc.ABCMeta): + """A specialized SOMA object for storing complex geometries with spatial indexing. + + The ``GeometryDataFrame`` class is designed to store and manage geometric shapes such as + polygons, lines, and multipoints, along with additional columns for associated attributes. + + Lifecycle: experimental + """ + + __slots__ = () + soma_type: Final = "SOMAGeometryDataFrame" # type: ignore[misc] + + # Lifecycle + + @classmethod + @abc.abstractmethod + def create( + cls, + uri: str, + *, + schema: pa.Schema, + index_column_names: Sequence[str] = ( + options.SOMA_JOINID, + options.SOMA_GEOMETRY, + ), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + context: Optional[Any] = None, + ) -> Self: + """Creates a new ``GeometryDataFrame`` at the given URI. + + The schema of the created geometry dataframe will include a column named + ``soma_joinid`` of type ``pyarrow.int64``, with negative values + disallowed, and a column named ``soma_geometry of type ``pyarrow.binary`` or + ``pyarrow.large_binary``. If a ``soma_joinid`` column or ``soma_geometry`` + are present in the provided schema, they must be of the correct type. If + either the ``soma_joinid`` column or ``soma_geometry`` column are not provided, + one will be added. The ``soma_joinid`` may be an index column. The + ``soma_geometry`` column must be an index column. + + Args: + uri: The URI where the dataframe will be created. + schema: Arrow schema defining the per-column schema. This schema + must define all columns, including columns to be named as index + columns. If the schema includes types unsupported by the SOMA + implementation, an error will be raised. + index_column_names: A list of column names to use as user-defined + index columns (e.g., ``['cell_type', 'tissue_type']``). + All named columns must exist in the schema, and at least one + index column name is required. + axis_names: An ordered list of axis column names that correspond to the + names of the axes of the coordinate space the geometries are defined + on. + domain: An optional sequence of tuples specifying the domain of each + index column. Two tuples must be provided for the ``soma_geometry`` + column which store the width followed by the height. Each tuple should + be a pair consisting of the minimum and maximum values storable in the + index column. If omitted entirely, or if ``None`` in a given dimension, + the corresponding index-column domain will use the minimum and maximum + possible values for the column's datatype. This makes a dataframe + growable. + + Returns: + The newly created geometry dataframe, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + # Data operations + + @abc.abstractmethod + def read( + self, + coords: options.SparseDFCoords = (), + column_names: Optional[Sequence[str]] = None, + *, + batch_size: options.BatchSize = options.BatchSize(), + partitions: Optional[options.ReadPartitions] = None, + result_order: options.ResultOrderStr = _RO_AUTO, + value_filter: Optional[str] = None, + platform_config: Optional[options.PlatformConfig] = None, + ) -> data.ReadIter[pa.Table]: + """Reads a user-defined slice of data into Arrow tables. + + Args: + coords: for each index dimension, which rows to read. + Defaults to ``()``, meaning no constraint -- all IDs. + column_names: the named columns to read and return. + Defaults to ``None``, meaning no constraint -- all column names. + partitions: If present, specifies that this is part of + a partitioned read, and which part of the data to include. + result_order: the order to return results, specified as a + :class:`~options.ResultOrder` or its string value. + value_filter: an optional value filter to apply to the results. + The default of ``None`` represents no filter. Value filter + syntax is implementation-defined; see the documentation + for the particular SOMA implementation for details. + Returns: + A :class:`ReadIter` of :class:`pa.Table`s. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def read_spatial_region( + self, + region: Optional[options.SpatialRegion] = None, + column_names: Optional[Sequence[str]] = None, + *, + region_transform: Optional[coordinates.CoordinateTransform] = None, + region_coord_space: Optional[coordinates.CoordinateSpace] = None, + batch_size: options.BatchSize = options.BatchSize(), + partitions: Optional[options.ReadPartitions] = None, + result_order: options.ResultOrderStr = _RO_AUTO, + value_filter: Optional[str] = None, + platform_config: Optional[options.PlatformConfig] = None, + ) -> "SpatialRead[data.ReadIter[pa.Table]]": + """Reads data intersecting an user-defined region of space into a + :class:`SpatialRead` with data in Arrow tables. + + + Args: + region: The region to query. May be a box in the form + [x_min, y_min, x_max, y_max] (for 2D images), a box in the form + [x_min, y_min, z_min, x_max, y_max, z_max] (for 3D images), or + a shapely Geometry. + column_names: The named columns to read and return. + Defaults to ``None``, meaning no constraint -- all column names. + region_transform: An optional coordinate transform from the read region to the + coordinate system of the spatial dataframe. + Defaults to ``None``, meaning an identity transform. + region_coord_space: An optional coordinate space for the region being read. + Defaults to ``None``, coordinate space will be inferred from transform. + batch_size: The size of batched reads. + Defaults to `unbatched`. + partitions: If present, specifies that this is part of a partitioned read, + and which part of the data to include. + result_order: the order to return results, specified as a + :class:`~options.ResultOrder` or its string value. + value_filter: an optional value filter to apply to the results. + The default of ``None`` represents no filter. Value filter + syntax is implementation-defined; see the documentation + for the particular SOMA implementation for details. + + Returns: + A :class:`SpatialRead` with :class:`ReadIter` of :class:`pa.Table`s data. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def write( + self, + values: Union[pa.RecordBatch, pa.Table], + *, + platform_config: Optional[options.PlatformConfig] = None, + ) -> Self: + """Writes the data from an Arrow table to the persistent object. + + As duplicate index values are not allowed, index values already present + in the object are overwritten and new index values are added. + + Args: + values: An Arrow table containing all columns, including + the index columns. The schema for the values must match + the schema for the ``DataFrame``. + + Returns: ``self``, to enable method chaining. + + Lifecycle: experimental + """ + raise NotImplementedError() + + # Metadata operations + + @property + @abc.abstractmethod + def schema(self) -> pa.Schema: + """The schema of the data in this dataframe. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def index_column_names(self) -> Tuple[str, ...]: + """The names of the index (dimension) columns. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def axis_names(self) -> Tuple[str, ...]: + """The names of the axes of the coordinate space the data is defined on. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def coordinate_space(self) -> Optional[coordinates.CoordinateSpace]: + """Coordinate space for this geometry dataframe. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @coordinate_space.setter + @abc.abstractmethod + def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: + """Coordinate space for this geometry dataframe. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def domain(self) -> Tuple[Tuple[Any, Any], ...]: + """The allowable range of values in each index column. + + Returns: a tuple of minimum and maximum values, inclusive, + storable on each index column of the dataframe. + + Lifecycle: experimental + """ + raise NotImplementedError() + + +class MultiscaleImage( # type: ignore[misc] # __eq__ false positive + base.SOMAObject, + Generic[_DenseND, _RootSO], + MutableMapping[str, _DenseND], + metaclass=abc.ABCMeta, +): + """A multiscale image with an extendable number of resolution levels. + + The multiscale image defines the top level properties. Each level must + match the expected following properties: + * number of channels + * axis order + + Lifecycle: experimental + """ + + # This class is implemented as a mixin to be used with SOMA classes. + # For example, a SOMA implementation would look like this: + # + # # This type-ignore comment will always be needed due to limitations + # # of type annotations; it is (currently) expected. + # class MultiscaleImage( # type: ignore[type-var] + # ImplBaseCollection[ImplSOMAObject], + # somacore.MultiscaleImage[ImplDenseNDArray, ImpSOMAObject], + # ): + # ... + + soma_type: Final = "SOMAMultiscaleImage" # type: ignore[misc] + __slots__ = () + + # Lifecycle + + @classmethod + @abc.abstractmethod + def create( + cls, + uri: str, + *, + type: pa.DataType, + reference_level_shape: Sequence[int], + axis_names: Sequence[str] = ("c", "y", "x"), + axis_types: Sequence[str] = ("channel", "height", "width"), + platform_config: Optional[options.PlatformConfig] = None, + context: Optional[Any] = None, + ) -> Self: + """Creates a new collection of this type at the given URI. + + Args: + uri: The URI where the collection will be created. + reference_level_shape: The shape of the reference level for the multiscale + image. In most cases, this corresponds to the size of the image + at ``level=0``. + axis_names: The names of the axes of the image. + axis_types: The types of the axes of the image. Must be the same length as + ``axis_names``. Valid types are: ``channel``, ``height``, ``width``, + and ``depth``. + + Returns: + The newly created collection, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def add_new_level( + self, + key: str, + *, + uri: Optional[str] = None, + shape: Sequence[int], + ) -> data.DenseNDArray: + """Add a new level in the multi-scale image. + + Parameters are as in :meth:`data.DenseNDArray.create`. The provided shape will + be used to compute the scale between images and must correspond to the image + size for the entire image. + + Lifecycle: experimental + """ + raise NotImplementedError() + + # Data operations + + @abc.abstractmethod + def read_spatial_region( + self, + level: Union[int, str], + region: options.SpatialRegion = (), + *, + channel_coords: options.DenseCoord = None, + region_transform: Optional[coordinates.CoordinateTransform] = None, + region_coord_space: Optional[coordinates.CoordinateSpace] = None, + result_order: options.ResultOrderStr = _RO_AUTO, + platform_config: Optional[options.PlatformConfig] = None, + ) -> "SpatialRead[pa.Tensor]": + """Reads a user-defined region of space into a :class:`SpatialRead` with data + in either an Arrow tensor or table. + + Reads the bounding box of the input region from the requested image level. This + will return a :class:`SpatialRead` with the image data stored as a + :class:`pa.Tensor`. + + Args: + level: The image level to read the data from. May use index of the level + or the image name. + region: The region to query. May be a box in the form + [x_min, y_min, x_max, y_max] (for 2D images), a box in the form + [x_min, y_min, z_min, x_max, y_max, z_max] (for 3D images), or + a shapely Geometry. + channel_coords: An optional slice that defines the channel coordinates + to read. + region_transform: An optional coordinate transform that provides the + transformation from the provided region to the reference level of this + image. Defaults to ``None``. + region_coord_space: An optional coordinate space for the region being read. + The axis names must match the input axis names of the transform. + Defaults to ``None``, coordinate space will be inferred from transform. + result_order: the order to return results, specified as a + :class:`~options.ResultOrder` or its string value. + + Returns: + The data bounding the requested region as a :class:`SpatialRead` with + :class:`pa.Tensor` data. + """ + raise NotImplementedError() + + # Metadata operations + + @property + @abc.abstractmethod + def axis_names(self) -> Tuple[str, ...]: + """The name of the image axes. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def coordinate_space(self) -> Optional[coordinates.CoordinateSpace]: + """Coordinate space for this multiscale image. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @coordinate_space.setter + @abc.abstractmethod + def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: + """Coordinate space for this multiscale image. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_from_level( + self, level: Union[int, str] + ) -> coordinates.ScaleTransform: + """Returns the transformation from user requested level to image reference level. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_to_level( + self, level: Union[int, str] + ) -> coordinates.ScaleTransform: + """Returns the transformation from the image reference level to the user + requested level. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def image_type(self) -> str: + """The order of the axes as stored in the data model. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def level_count(self) -> int: + """The number of image levels stored in the MultiscaleImage. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def level_properties(self, level: Union[int, str]) -> "ImageProperties": + """The properties of an image at the specified level. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + def reference_level(self) -> Optional[int]: + """The index of image level that is used as a reference level. + + This will return ``None`` if no current image level matches the size of the + reference level. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def reference_level_properties(self) -> "ImageProperties": + """The image properties of the reference level. + + Lifecycle: experimental + """ + raise NotImplementedError() + + +class ImageProperties(Protocol): + """Class requirements for level properties of images. + + Lifecycle: experimental + """ + + @property + def name(self) -> str: + """The key for the image. + + Lifecycle: experimental + """ + + @property + def shape(self) -> Tuple[int, ...]: + """Size of each axis of the image. + + Lifecycle: experimental + """ + + +@dataclass +class SpatialRead(Generic[_ReadData]): + """Reader for spatial data. + + Args: + data: The data accessor. + data_coordinate_space: The coordinate space the read data is defined on. + output_coordinate_space: The requested output coordinate space. + coordinate_transform: A coordinate transform from the data coordinate space to + the desired output coordinate space. + + Lifecycle: experimental + """ + + data: _ReadData + data_coordinate_space: coordinates.CoordinateSpace + output_coordinate_space: coordinates.CoordinateSpace + coordinate_transform: coordinates.CoordinateTransform + + def __post_init__(self): + if ( + self.data_coordinate_space.axis_names + != self.coordinate_transform.input_axes + ): + raise ValueError( + "Input coordinate transform axis names do not match the data coordinate " + "space." + ) + if ( + self.output_coordinate_space.axis_names + != self.coordinate_transform.output_axes + ): + raise ValueError( + "Output coordinate transform axis names do not match the output " + "coordinate space." + ) From b53cd2bced083d6b83f3d00bf446a7ca9f18f6ea Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Thu, 26 Sep 2024 13:56:38 -0400 Subject: [PATCH 09/17] Add the `Scene` class (#220) * Create the `Scene` class * Add `spatial` collection of scenes to the `Experiment` class * Add Scene to ephemeral collections and tests --------- Co-authored-by: nguyenv --- python-spec/src/somacore/__init__.py | 2 + .../src/somacore/ephemeral/__init__.py | 2 + .../src/somacore/ephemeral/collections.py | 160 ++++++- python-spec/src/somacore/experiment.py | 11 +- python-spec/src/somacore/scene.py | 444 ++++++++++++++++++ python-spec/testing/test_collection.py | 2 + 6 files changed, 618 insertions(+), 3 deletions(-) create mode 100644 python-spec/src/somacore/scene.py diff --git a/python-spec/src/somacore/__init__.py b/python-spec/src/somacore/__init__.py index 4cf00dab..f16b3f8f 100644 --- a/python-spec/src/somacore/__init__.py +++ b/python-spec/src/somacore/__init__.py @@ -37,6 +37,7 @@ from .query import AxisColumnNames from .query import AxisQuery from .query import ExperimentAxisQuery +from .scene import Scene from .spatial import GeometryDataFrame from .spatial import ImageProperties from .spatial import MultiscaleImage @@ -67,6 +68,7 @@ "SpatialRead", "Experiment", "Measurement", + "Scene", "ImageProperties", "MultiscaleImage", "SpatialDataFrame", diff --git a/python-spec/src/somacore/ephemeral/__init__.py b/python-spec/src/somacore/ephemeral/__init__.py index 6ec3fce2..932f63f1 100644 --- a/python-spec/src/somacore/ephemeral/__init__.py +++ b/python-spec/src/somacore/ephemeral/__init__.py @@ -8,9 +8,11 @@ from .collections import Collection from .collections import Experiment from .collections import Measurement +from .collections import Scene __all__ = ( "Collection", "Experiment", "Measurement", + "Scene", ) diff --git a/python-spec/src/somacore/ephemeral/collections.py b/python-spec/src/somacore/ephemeral/collections.py index de49cb05..583ed947 100644 --- a/python-spec/src/somacore/ephemeral/collections.py +++ b/python-spec/src/somacore/ephemeral/collections.py @@ -1,13 +1,27 @@ -from typing import Any, Dict, Iterator, NoReturn, Optional, TypeVar - +from typing import ( + Any, + Dict, + Iterator, + NoReturn, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) + +import pyarrow as pa from typing_extensions import Literal, Self from .. import base from .. import collection +from .. import coordinates from .. import data from .. import experiment from .. import measurement from .. import options +from .. import scene +from .. import spatial _Elem = TypeVar("_Elem", bound=base.SOMAObject) @@ -120,6 +134,14 @@ class Collection( # type: ignore[misc] # __eq__ false positive ] """The loosest possible constraint of the abstract Measurement type.""" +_BasicAbstractScene = scene.Scene[ + spatial.MultiscaleImage, + spatial.PointCloud, + spatial.GeometryDataFrame, + base.SOMAObject, +] +"""The loosest possible constraint of the abstract Scene type.""" + class Measurement( # type: ignore[misc] # __eq__ false positive BaseCollection[base.SOMAObject], _BasicAbstractMeasurement @@ -129,11 +151,145 @@ class Measurement( # type: ignore[misc] # __eq__ false positive __slots__ = () +class Scene( # type: ignore[misc] # __eq__ false positive + BaseCollection[base.SOMAObject], _BasicAbstractScene +): + """An in-memory Collection with Scene semantics.""" + + __slots__ = () + + @property + def coordinate_space(self) -> coordinates.CoordinateSpace: + """Coordinate system for this scene.""" + raise NotImplementedError() + + @coordinate_space.setter + def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: + raise NotImplementedError() + + def add_geometry_dataframe( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: str, + schema: pa.Schema, + index_column_names: Sequence[str] = ( + options.SOMA_JOINID, + options.SOMA_GEOMETRY, + ), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + context: Optional[Any] = None, + ) -> spatial.GeometryDataFrame: + raise NotImplementedError() + + def add_multiscale_image( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: str, + type: pa.DataType, + image_type: str = "CYX", # TODO: Replace this arg after PR #219 is merged + reference_level_shape: Sequence[int], + axis_names: Sequence[str] = ("c", "x", "y"), + ) -> spatial.MultiscaleImage: + raise NotImplementedError() + + def add_new_point_cloud( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: Optional[str] = None, + schema: pa.Schema, + index_column_names: Sequence[str] = (options.SOMA_JOINID,), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + ) -> spatial.PointCloud: + raise NotImplementedError() + + def set_transform_to_geometry_dataframe( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "obsl", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> spatial.GeometryDataFrame: + raise NotImplementedError() + + def set_transform_to_multiscale_image( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "img", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> spatial.MultiscaleImage: + raise NotImplementedError() + + def set_transform_to_point_cloud( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "obsl", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> spatial.PointCloud: + raise NotImplementedError() + + def get_transform_from_geometry_dataframe( + self, key: str, *, subcollection: Union[str, Sequence[str]] = "obsl" + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_from_multiscale_image( + self, + key: str, + *, + subcollection: str = "img", + level: Optional[Union[str, int]] = None, + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_from_point_cloud( + self, key: str, *, subcollection: str = "obsl" + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_to_geometry_dataframe( + self, key: str, *, subcollection: Union[str, Sequence[str]] = "obsl" + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_to_multiscale_image( + self, + key: str, + *, + subcollection: str = "img", + level: Optional[Union[str, int]] = None, + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + def get_transform_to_point_cloud( + self, key: str, *, subcollection: str = "obsl" + ) -> coordinates.CoordinateTransform: + raise NotImplementedError() + + class Experiment( # type: ignore[misc] # __eq__ false positive BaseCollection[base.SOMAObject], experiment.Experiment[ data.DataFrame, collection.Collection[_BasicAbstractMeasurement], + collection.Collection[_BasicAbstractScene], base.SOMAObject, ], ): diff --git a/python-spec/src/somacore/experiment.py b/python-spec/src/somacore/experiment.py index 6f97311f..bb0d7c65 100644 --- a/python-spec/src/somacore/experiment.py +++ b/python-spec/src/somacore/experiment.py @@ -8,16 +8,21 @@ from . import data from . import measurement from . import query +from . import scene _DF = TypeVar("_DF", bound=data.DataFrame) """An implementation of a DataFrame.""" _MeasColl = TypeVar("_MeasColl", bound=collection.Collection[measurement.Measurement]) """An implementation of a collection of Measurements.""" +_SceneColl = TypeVar("_SceneColl", bound=collection.Collection[scene.Scene]) +"""An implemenation of a collection of spatial data.""" _RootSO = TypeVar("_RootSO", bound=base.SOMAObject) """The root SOMA object type of the implementation.""" -class Experiment(collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _RootSO]): +class Experiment( + collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _SceneColl, _RootSO] +): """A collection subtype representing an annotated 2D matrix of measurements. In single cell biology, this can represent multiple modes of measurement @@ -38,6 +43,7 @@ class Experiment(collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _Ro # somacore.Experiment[ # ImplDataFrame, # _DF # ImplMeasurement, # _MeasColl + # ImplScene, # _SceneColl # ImplSOMAObject, # _RootSO # ], # ): @@ -57,6 +63,9 @@ class Experiment(collection.BaseCollection[_RootSO], Generic[_DF, _MeasColl, _Ro ms = _mixin.item[_MeasColl]() """A collection of named measurements.""" + spatial = _mixin.item[_SceneColl]() # TODO: Discuss the name of this element. + """A collection of named spatial scenes.""" + def axis_query( self, measurement_name: str, diff --git a/python-spec/src/somacore/scene.py b/python-spec/src/somacore/scene.py new file mode 100644 index 00000000..4c9f959d --- /dev/null +++ b/python-spec/src/somacore/scene.py @@ -0,0 +1,444 @@ +"""Implementation of the SOMA scene collection for spatial data""" + +import abc +from typing import Any, Generic, Optional, Sequence, Tuple, TypeVar, Union + +import pyarrow as pa +from typing_extensions import Final + +from . import _mixin +from . import base +from . import collection +from . import coordinates +from . import options +from . import spatial + +_MultiscaleImage = TypeVar("_MultiscaleImage", bound=spatial.MultiscaleImage) +"""A particular implementation of a multiscale image.""" + +_PointCloud = TypeVar("_PointCloud", bound=spatial.PointCloud) +"""A particular implementation of a point cloud.""" + +_GeometryDataFrame = TypeVar("_GeometryDataFrame", bound=spatial.GeometryDataFrame) +"""A particular implementation of a geometry dataframe.""" + +_RootSO = TypeVar("_RootSO", bound=base.SOMAObject) +"""The root SomaObject type of the implementation.""" + + +class Scene( + collection.BaseCollection[_RootSO], + Generic[_MultiscaleImage, _PointCloud, _GeometryDataFrame, _RootSO], +): + """A collection subtype representing spatial assets that can all be stored + on a single coordinate space. + + Lifecycle: experimental + """ + + # This class is implemented as a mixin to be used with SOMA classes. + # For example, a SOMA implementation would look like this: + # + # # This type-ignore comment will always be needed due to limitations + # # of type annotations; it is (currently) expected. + # class Scene( # type: ignore[type-var] + # ImplBaseCollection[ImplSOMAObject], + # somacore.Scene[ + # ImplMultiscaleImage, + # ImplPointCloud, + # ImplGeometryDataFrame, + # ImplSOMAObject, + # ], + # ): + # ... + + __slots__ = () + soma_type: Final = "SOMAScene" # type: ignore[misc] + + img = _mixin.item[collection.Collection[_MultiscaleImage]]() + """A collection of multiscale images backing the spatial data. + + Lifecycle: experimental + """ + + obsl = _mixin.item[collection.Collection[Union[_PointCloud, _GeometryDataFrame]]]() + """A collection of observation location data. + + This collection exists to store any spatial data in the scene that joins on the obs + ``soma_joinid``. Each dataframe in ``obsl`` can be either a PointCloud + or a GeometryDataFrame. + + Lifecycle: experimental + """ + + varl = _mixin.item[ + collection.Collection[ + collection.Collection[Union[_PointCloud, _GeometryDataFrame]] + ] + ]() + """A collection of collections of variable location data. + + This collection exists to store any spatial data in the scene that joins on the + variable ``soma_joinid`` for the measurements in the SOMA experiment. The top-level + collection maps from measurement name to a collection of dataframes. + + Each dataframe in a ``varl`` subcollection can be either a GeometryDataFrame or a + PointCloud. + + Lifecycle: experimental + """ + + @property + @abc.abstractmethod + def coordinate_space(self) -> Optional[coordinates.CoordinateSpace]: + """Coordinate system for this scene. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @coordinate_space.setter + @abc.abstractmethod + def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: + raise NotImplementedError() + + @abc.abstractmethod + def add_geometry_dataframe( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: str, + schema: pa.Schema, + index_column_names: Sequence[str] = ( + options.SOMA_JOINID, + options.SOMA_GEOMETRY, + ), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + context: Optional[Any] = None, + ) -> _GeometryDataFrame: + """Adds a ``GeometryDataFrame`` to the scene and sets a coordinate transform + between the scene and the dataframe. + + If the subcollection the geometry dataframe is inside of is more than one + layer deep, the input should be provided as a sequence of names. For example, + to set the transformation to a geometry dataframe named "transcripts" in + the "var/RNA" collection:: + + scene.add_geometry_dataframe( + 'cell_boundaries', subcollection=['var', 'RNA'], **kwargs + ) + + Args: + key: The name of the geometry dataframe. + transform: The coordinate transformation from the scene to the dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + The newly create ``GeometryDataFrame``, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def add_multiscale_image( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: str, + type: pa.DataType, + image_type: str = "CYX", # TODO: Replace this arg after PR #219 is merged + reference_level_shape: Sequence[int], + axis_names: Sequence[str] = ("c", "x", "y"), + ) -> _MultiscaleImage: + """Adds a ``MultiscaleImage`` to the scene and sets a coordinate transform + between the scene and the dataframe. + + Parameters are as in :meth:`spatial.PointCloud.create`. + See :meth:`add_new_collection` for details about child URIs. + + Args: + key: The name of the geometry dataframe. + transform: The coordinate transformation from the scene to the dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + The newly create ``MultiscaleImage``, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def add_new_point_cloud( + self, + key: str, + subcollection: Union[str, Sequence[str]], + transform: Optional[coordinates.CoordinateTransform], + *, + uri: Optional[str] = None, + schema: pa.Schema, + index_column_names: Sequence[str] = (options.SOMA_JOINID,), + axis_names: Sequence[str] = ("x", "y"), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, + platform_config: Optional[options.PlatformConfig] = None, + ) -> _PointCloud: + """Adds a point cloud to the scene and sets a coordinate transform + between the scene and the dataframe. + + Parameters are as in :meth:`spatial.PointCloud.create`. + See :meth:`add_new_collection` for details about child URIs. + + Args: + key: The name of the geometry dataframe. + transform: The coordinate transformation from the scene to the dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + The newly created ``PointCloud``, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def set_transform_to_geometry_dataframe( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "obsl", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> _GeometryDataFrame: + """Adds the coordinate transform for the scene coordinate space to + a geometry dataframe stored in the scene. + + If the subcollection the geometry dataframe is inside of is more than one + layer deep, the input should be provided as a sequence of names. For example, + to set a transformation for geometry dataframe named "transcripts" in the + "var/RNA" collection:: + + scene.set_transfrom_for_geometry_dataframe( + 'transcripts', transform, subcollection=['var', 'RNA'], + ) + + Args: + key: The name of the geometry dataframe. + transform: The coordinate transformation from the scene to the dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + coordinate_space: Optional coordinate space for the dataframe. This will + replace the existing coordinate space of the dataframe. + + Returns: + The geometry dataframe, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def set_transform_to_multiscale_image( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "img", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> _MultiscaleImage: + """Adds the coordinate transform for the scene coordinate space to + a multiscale image stored in the scene. + + The transform to the multiscale image must be to the coordinate space + defined on the reference level for the image. In most cases, this will be + the level ``0`` image. + + Args: + key: The name of the multiscale image. + transform: The coordinate transformation from the scene to the reference + level of the multiscale image. + subcollection: The name, or sequence of names, of the subcollection the + image is stored in. Defaults to ``'img'``. + coordinate_space: Optional coordinate space for the image. This will + replace the existing coordinate space of the multiscale image. + + Returns: + The multiscale image, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def set_transform_to_point_cloud( + self, + key: str, + transform: coordinates.CoordinateTransform, + *, + subcollection: Union[str, Sequence[str]] = "obsl", + coordinate_space: Optional[coordinates.CoordinateSpace] = None, + ) -> _PointCloud: + """Adds the coordinate transform for the scene coordinate space to + a point cloud stored in the scene. + + If the subcollection the point cloud is inside of is more than one + layer deep, the input should be provided as a sequence of names. For example, + to set a transform for a point named `transcripts` in the `var/RNA` + collection:: + + scene.set_transformation_for_point_cloud( + 'transcripts', transform, subcollection=['var', 'RNA'], + ) + + Args: + key: The name of the point cloud. + transform: The coordinate transformation from the scene to the point cloud. + subcollection: The name, or sequence of names, of the subcollection the + point cloud is stored in. Defaults to ``'obsl'``. + coordinate_space: Optional coordinate space for the point cloud. This will + replace the existing coordinate space of the point cloud. Defaults to + ``None``. + + Returns: + The point cloud, opened for writing. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_from_geometry_dataframe( + self, key: str, *, subcollection: Union[str, Sequence[str]] = "obsl" + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the requested geometry dataframe + to the scene. + + Args: + key: The name of the geometry dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + Coordinate transform from the dataframe to the scene. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_from_multiscale_image( + self, + key: str, + *, + subcollection: str = "img", + level: Optional[Union[str, int]] = None, + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the requested multiscale image to + the scene. + + Args: + key: The name of the multiscale image. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'img'``. + level: The level of the image to get the transformation from. + Defaults to ``None`` -- the transformation will be to the reference + level. + + Returns: + Coordinate transform from the multiscale image to the scene. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_from_point_cloud( + self, key: str, *, subcollection: str = "obsl" + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the requested point cloud to + the scene. + + Args: + key: The name of the point cloud. + subcollection: The name, or sequence of names, of the subcollection the + point cloud is stored in. Defaults to ``'obsl'``. + + Returns: + Coordinate transform from the scene to the point cloud. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_to_geometry_dataframe( + self, key: str, *, subcollection: Union[str, Sequence[str]] = "obsl" + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the scene to a requested + geometery dataframe. + + Args: + key: The name of the geometry dataframe. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'obsl'``. + + Returns: + Coordinate transform from the scene to the requested dataframe. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_to_multiscale_image( + self, + key: str, + *, + subcollection: str = "img", + level: Optional[Union[str, int]] = None, + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the scene to a requested + multiscale image. + + Args: + key: The name of the multiscale image. + subcollection: The name, or sequence of names, of the subcollection the + dataframe is stored in. Defaults to ``'img'``. + level: The level of the image to get the transformation to. + Defaults to ``None`` -- the transformation will be to the reference + level. + + Returns: + Coordinate transform from the scene to the requested multiscale image. + + Lifecycle: experimental + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_transform_to_point_cloud( + self, key: str, *, subcollection: str = "obsl" + ) -> coordinates.CoordinateTransform: + """Returns the coordinate transformation from the scene to a requested + point cloud. + + Args: + key: The name of the point cloud. + subcollection: The name, or sequence of names, of the subcollection the + point cloud is stored in. Defaults to ``'obsl'``. + + Returns: + Coordinate transform from the scene to the requested point cloud. + + Lifecycle: experimental + """ + raise NotImplementedError() diff --git a/python-spec/testing/test_collection.py b/python-spec/testing/test_collection.py index e0b8569b..e997c6d2 100644 --- a/python-spec/testing/test_collection.py +++ b/python-spec/testing/test_collection.py @@ -40,3 +40,5 @@ def test_method_resolution_order(self): self.assertEqual("SOMAMeasurement", m.soma_type) exp = ephemeral.Experiment() self.assertEqual("SOMAExperiment", exp.soma_type) + scene = ephemeral.Scene() + self.assertEqual("SOMAScene", scene.soma_type) From 73ccc066e73be4989f9e05d3e4953c516a48e3e8 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:03:23 -0400 Subject: [PATCH 10/17] Add spatial presence matrices (#221) This adds dataframes for creating a join table between the scenes and observations/variables. New methods are added to the `ExperimentAxisQuery` class for getting a Arrow array of scene names that relate to the obs or var in the query. --------- Co-authored-by: Aaron Wolen --- python-spec/src/somacore/experiment.py | 11 ++++++ python-spec/src/somacore/measurement.py | 11 ++++++ python-spec/src/somacore/query/query.py | 46 +++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/python-spec/src/somacore/experiment.py b/python-spec/src/somacore/experiment.py index bb0d7c65..b570aa2a 100644 --- a/python-spec/src/somacore/experiment.py +++ b/python-spec/src/somacore/experiment.py @@ -66,6 +66,17 @@ class Experiment( spatial = _mixin.item[_SceneColl]() # TODO: Discuss the name of this element. """A collection of named spatial scenes.""" + obs_spatial_presence = _mixin.item[_DF]() + """A dataframe that stores the presence of obs in the spatial scenes. + + This provides a join table for the obs ``soma_joinid`` and the scene names used in + the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid`` + and ``scene_id``. The ``scene_id`` column must have type ``string``. The + dataframe must contain a ``boolean`` column ``soma_data``. The values of ``soma_data`` are + ``True`` if the obs ``soma_joinid`` is contained in the scene + ``scene_id`` and ``False`` otherwise. + """ + def axis_query( self, measurement_name: str, diff --git a/python-spec/src/somacore/measurement.py b/python-spec/src/somacore/measurement.py index e7d0c9f4..273b58f2 100644 --- a/python-spec/src/somacore/measurement.py +++ b/python-spec/src/somacore/measurement.py @@ -99,3 +99,14 @@ class Measurement( This is indexed by ``[varid_1, varid_2]``. """ + + var_spatial_presence = _mixin.item[_DF]() + """A dataframe that stores the presence of var in the spatial scenes. + + This provides a join table for the var ``soma_joinid`` and the scene names used in + the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid`` + and ``scene_id``. The ``scene_id`` column must have type ``string``. The + dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are + ``True`` if the var with varid ``soma_joinid`` is contained in scene with name + ``scene_id`` and ``False`` otherwise. + """ diff --git a/python-spec/src/somacore/query/query.py b/python-spec/src/somacore/query/query.py index ac22792b..947d8b0a 100644 --- a/python-spec/src/somacore/query/query.py +++ b/python-spec/src/somacore/query/query.py @@ -21,6 +21,7 @@ import numpy.typing as npt import pandas as pd import pyarrow as pa +import pyarrow.compute as pacomp from scipy import sparse from typing_extensions import Literal, Protocol, Self, TypedDict @@ -267,6 +268,48 @@ def varm(self, layer: str) -> data.SparseRead: """ return self._axism_inner(_Axis.VAR, layer) + def obs_scene_ids(self) -> pa.Array: + """Returns a pyarrow array with scene ids that contain obs from this + query. + + Lifecycle: experimental + """ + try: + obs_scene = self.experiment.obs_spatial_presence + except KeyError as ke: + raise KeyError("Missing obs_scene") from ke + if not isinstance(obs_scene, data.DataFrame): + raise TypeError("obs_scene must be a dataframe.") + + full_table = obs_scene.read( + coords=((_Axis.OBS.getattr_from(self._joinids), slice(None))), + result_order=options.ResultOrder.COLUMN_MAJOR, + value_filter="data != 0", + ).concat() + + return pacomp.unique(full_table["scene_id"]) + + def var_scene_ids(self) -> pa.Array: + """Return a pyarrow array with scene ids that contain var from this + query. + + Lifecycle: experimental + """ + try: + var_scene = self._ms.var_spatial_presence + except KeyError as ke: + raise KeyError("Missing var_scene") from ke + if not isinstance(var_scene, data.DataFrame): + raise TypeError("var_scene must be a dataframe.") + + full_table = var_scene.read( + coords=((_Axis.OBS.getattr_from(self._joinids), slice(None))), + result_order=options.ResultOrder.COLUMN_MAJOR, + value_filter="data != 0", + ).concat() + + return pacomp.unique(full_table["scene_id"]) + def to_anndata( self, X_name: str, @@ -826,6 +869,9 @@ def obs(self) -> data.DataFrame: ... @property def context(self) -> Optional[base_types.ContextBase]: ... + @property + def obs_spatial_presence(self) -> data.DataFrame: ... + class _HasObsVar(Protocol[_T_co]): """Something which has an ``obs`` and ``var`` field. From aff0f0f19a8ad0265abce103cd393307e62c091a Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Mon, 30 Sep 2024 14:48:00 -0400 Subject: [PATCH 11/17] Small fixes to new spatial classes (#231) * Fix params for `add_multiscale_image` * Update `coordinate_space` property to not be optional - This property is always set on initialization for the `PointCloud`, * Remove non-existant class `SpatialDataFrame` from `__all__` --- python-spec/src/somacore/__init__.py | 1 - python-spec/src/somacore/ephemeral/collections.py | 2 +- python-spec/src/somacore/scene.py | 2 +- python-spec/src/somacore/spatial.py | 6 +++--- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/python-spec/src/somacore/__init__.py b/python-spec/src/somacore/__init__.py index f16b3f8f..e34a3aab 100644 --- a/python-spec/src/somacore/__init__.py +++ b/python-spec/src/somacore/__init__.py @@ -71,7 +71,6 @@ "Scene", "ImageProperties", "MultiscaleImage", - "SpatialDataFrame", "GeometryDataFrame", "PointCloud", "BatchSize", diff --git a/python-spec/src/somacore/ephemeral/collections.py b/python-spec/src/somacore/ephemeral/collections.py index 583ed947..b3328b03 100644 --- a/python-spec/src/somacore/ephemeral/collections.py +++ b/python-spec/src/somacore/ephemeral/collections.py @@ -194,9 +194,9 @@ def add_multiscale_image( *, uri: str, type: pa.DataType, - image_type: str = "CYX", # TODO: Replace this arg after PR #219 is merged reference_level_shape: Sequence[int], axis_names: Sequence[str] = ("c", "x", "y"), + axis_types: Sequence[str] = ("channel", "height", "width"), ) -> spatial.MultiscaleImage: raise NotImplementedError() diff --git a/python-spec/src/somacore/scene.py b/python-spec/src/somacore/scene.py index 4c9f959d..b07b3147 100644 --- a/python-spec/src/somacore/scene.py +++ b/python-spec/src/somacore/scene.py @@ -154,9 +154,9 @@ def add_multiscale_image( *, uri: str, type: pa.DataType, - image_type: str = "CYX", # TODO: Replace this arg after PR #219 is merged reference_level_shape: Sequence[int], axis_names: Sequence[str] = ("c", "x", "y"), + axis_types: Sequence[str] = ("channel", "height", "width"), ) -> _MultiscaleImage: """Adds a ``MultiscaleImage`` to the scene and sets a coordinate transform between the scene and the dataframe. diff --git a/python-spec/src/somacore/spatial.py b/python-spec/src/somacore/spatial.py index 3d739840..f0c93f3e 100644 --- a/python-spec/src/somacore/spatial.py +++ b/python-spec/src/somacore/spatial.py @@ -224,7 +224,7 @@ def index_column_names(self) -> Tuple[str, ...]: @property @abc.abstractmethod - def coordinate_space(self) -> Optional[coordinates.CoordinateSpace]: + def coordinate_space(self) -> coordinates.CoordinateSpace: """Coordinate space for this point cloud. Lifecycle: experimental @@ -470,7 +470,7 @@ def axis_names(self) -> Tuple[str, ...]: @property @abc.abstractmethod - def coordinate_space(self) -> Optional[coordinates.CoordinateSpace]: + def coordinate_space(self) -> coordinates.CoordinateSpace: """Coordinate space for this geometry dataframe. Lifecycle: experimental @@ -639,7 +639,7 @@ def axis_names(self) -> Tuple[str, ...]: @property @abc.abstractmethod - def coordinate_space(self) -> Optional[coordinates.CoordinateSpace]: + def coordinate_space(self) -> coordinates.CoordinateSpace: """Coordinate space for this multiscale image. Lifecycle: experimental From e92ecb49453e38c8eeb56922d14b70894f8ee98c Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:13:44 -0400 Subject: [PATCH 12/17] Rename PointCloud -> PointCloudDataFrame (#232) --- python-spec/src/somacore/__init__.py | 4 +- .../src/somacore/ephemeral/collections.py | 14 +++---- python-spec/src/somacore/scene.py | 38 ++++++++++--------- python-spec/src/somacore/spatial.py | 16 ++++---- 4 files changed, 39 insertions(+), 33 deletions(-) diff --git a/python-spec/src/somacore/__init__.py b/python-spec/src/somacore/__init__.py index e34a3aab..9afcd359 100644 --- a/python-spec/src/somacore/__init__.py +++ b/python-spec/src/somacore/__init__.py @@ -41,7 +41,7 @@ from .spatial import GeometryDataFrame from .spatial import ImageProperties from .spatial import MultiscaleImage -from .spatial import PointCloud +from .spatial import PointCloudDataFrame from .spatial import SpatialRead from .types import ContextBase @@ -72,7 +72,7 @@ "ImageProperties", "MultiscaleImage", "GeometryDataFrame", - "PointCloud", + "PointCloudDataFrame", "BatchSize", "IOfN", "ResultOrder", diff --git a/python-spec/src/somacore/ephemeral/collections.py b/python-spec/src/somacore/ephemeral/collections.py index b3328b03..9a53cea6 100644 --- a/python-spec/src/somacore/ephemeral/collections.py +++ b/python-spec/src/somacore/ephemeral/collections.py @@ -136,7 +136,7 @@ class Collection( # type: ignore[misc] # __eq__ false positive _BasicAbstractScene = scene.Scene[ spatial.MultiscaleImage, - spatial.PointCloud, + spatial.PointCloudDataFrame, spatial.GeometryDataFrame, base.SOMAObject, ] @@ -200,7 +200,7 @@ def add_multiscale_image( ) -> spatial.MultiscaleImage: raise NotImplementedError() - def add_new_point_cloud( + def add_new_point_cloud_dataframe( self, key: str, subcollection: Union[str, Sequence[str]], @@ -212,7 +212,7 @@ def add_new_point_cloud( axis_names: Sequence[str] = ("x", "y"), domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, platform_config: Optional[options.PlatformConfig] = None, - ) -> spatial.PointCloud: + ) -> spatial.PointCloudDataFrame: raise NotImplementedError() def set_transform_to_geometry_dataframe( @@ -235,14 +235,14 @@ def set_transform_to_multiscale_image( ) -> spatial.MultiscaleImage: raise NotImplementedError() - def set_transform_to_point_cloud( + def set_transform_to_point_cloud_dataframe( self, key: str, transform: coordinates.CoordinateTransform, *, subcollection: Union[str, Sequence[str]] = "obsl", coordinate_space: Optional[coordinates.CoordinateSpace] = None, - ) -> spatial.PointCloud: + ) -> spatial.PointCloudDataFrame: raise NotImplementedError() def get_transform_from_geometry_dataframe( @@ -259,7 +259,7 @@ def get_transform_from_multiscale_image( ) -> coordinates.CoordinateTransform: raise NotImplementedError() - def get_transform_from_point_cloud( + def get_transform_from_point_cloud_dataframe( self, key: str, *, subcollection: str = "obsl" ) -> coordinates.CoordinateTransform: raise NotImplementedError() @@ -278,7 +278,7 @@ def get_transform_to_multiscale_image( ) -> coordinates.CoordinateTransform: raise NotImplementedError() - def get_transform_to_point_cloud( + def get_transform_to_point_cloud_dataframe( self, key: str, *, subcollection: str = "obsl" ) -> coordinates.CoordinateTransform: raise NotImplementedError() diff --git a/python-spec/src/somacore/scene.py b/python-spec/src/somacore/scene.py index b07b3147..9806a663 100644 --- a/python-spec/src/somacore/scene.py +++ b/python-spec/src/somacore/scene.py @@ -16,7 +16,9 @@ _MultiscaleImage = TypeVar("_MultiscaleImage", bound=spatial.MultiscaleImage) """A particular implementation of a multiscale image.""" -_PointCloud = TypeVar("_PointCloud", bound=spatial.PointCloud) +_PointCloudDataFrame = TypeVar( + "_PointCloudDataFrame", bound=spatial.PointCloudDataFrame +) """A particular implementation of a point cloud.""" _GeometryDataFrame = TypeVar("_GeometryDataFrame", bound=spatial.GeometryDataFrame) @@ -28,7 +30,7 @@ class Scene( collection.BaseCollection[_RootSO], - Generic[_MultiscaleImage, _PointCloud, _GeometryDataFrame, _RootSO], + Generic[_MultiscaleImage, _PointCloudDataFrame, _GeometryDataFrame, _RootSO], ): """A collection subtype representing spatial assets that can all be stored on a single coordinate space. @@ -45,7 +47,7 @@ class Scene( # ImplBaseCollection[ImplSOMAObject], # somacore.Scene[ # ImplMultiscaleImage, - # ImplPointCloud, + # ImplPointCloudDataFrame, # ImplGeometryDataFrame, # ImplSOMAObject, # ], @@ -61,11 +63,13 @@ class Scene( Lifecycle: experimental """ - obsl = _mixin.item[collection.Collection[Union[_PointCloud, _GeometryDataFrame]]]() + obsl = _mixin.item[ + collection.Collection[Union[_PointCloudDataFrame, _GeometryDataFrame]] + ]() """A collection of observation location data. This collection exists to store any spatial data in the scene that joins on the obs - ``soma_joinid``. Each dataframe in ``obsl`` can be either a PointCloud + ``soma_joinid``. Each dataframe in ``obsl`` can be either a PointCloudDataFrame or a GeometryDataFrame. Lifecycle: experimental @@ -73,7 +77,7 @@ class Scene( varl = _mixin.item[ collection.Collection[ - collection.Collection[Union[_PointCloud, _GeometryDataFrame]] + collection.Collection[Union[_PointCloudDataFrame, _GeometryDataFrame]] ] ]() """A collection of collections of variable location data. @@ -83,7 +87,7 @@ class Scene( collection maps from measurement name to a collection of dataframes. Each dataframe in a ``varl`` subcollection can be either a GeometryDataFrame or a - PointCloud. + PointCloudDataFrame. Lifecycle: experimental """ @@ -161,7 +165,7 @@ def add_multiscale_image( """Adds a ``MultiscaleImage`` to the scene and sets a coordinate transform between the scene and the dataframe. - Parameters are as in :meth:`spatial.PointCloud.create`. + Parameters are as in :meth:`spatial.PointCloudDataFrame.create`. See :meth:`add_new_collection` for details about child URIs. Args: @@ -178,7 +182,7 @@ def add_multiscale_image( raise NotImplementedError() @abc.abstractmethod - def add_new_point_cloud( + def add_new_point_cloud_dataframe( self, key: str, subcollection: Union[str, Sequence[str]], @@ -190,11 +194,11 @@ def add_new_point_cloud( axis_names: Sequence[str] = ("x", "y"), domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, platform_config: Optional[options.PlatformConfig] = None, - ) -> _PointCloud: + ) -> _PointCloudDataFrame: """Adds a point cloud to the scene and sets a coordinate transform between the scene and the dataframe. - Parameters are as in :meth:`spatial.PointCloud.create`. + Parameters are as in :meth:`spatial.PointCloudDataFrame.create`. See :meth:`add_new_collection` for details about child URIs. Args: @@ -204,7 +208,7 @@ def add_new_point_cloud( dataframe is stored in. Defaults to ``'obsl'``. Returns: - The newly created ``PointCloud``, opened for writing. + The newly created ``PointCloudDataFrame``, opened for writing. Lifecycle: experimental """ @@ -279,14 +283,14 @@ def set_transform_to_multiscale_image( raise NotImplementedError() @abc.abstractmethod - def set_transform_to_point_cloud( + def set_transform_to_point_cloud_dataframe( self, key: str, transform: coordinates.CoordinateTransform, *, subcollection: Union[str, Sequence[str]] = "obsl", coordinate_space: Optional[coordinates.CoordinateSpace] = None, - ) -> _PointCloud: + ) -> _PointCloudDataFrame: """Adds the coordinate transform for the scene coordinate space to a point cloud stored in the scene. @@ -295,7 +299,7 @@ def set_transform_to_point_cloud( to set a transform for a point named `transcripts` in the `var/RNA` collection:: - scene.set_transformation_for_point_cloud( + scene.set_transformation_for_point_cloud_dataframe( 'transcripts', transform, subcollection=['var', 'RNA'], ) @@ -361,7 +365,7 @@ def get_transform_from_multiscale_image( raise NotImplementedError() @abc.abstractmethod - def get_transform_from_point_cloud( + def get_transform_from_point_cloud_dataframe( self, key: str, *, subcollection: str = "obsl" ) -> coordinates.CoordinateTransform: """Returns the coordinate transformation from the requested point cloud to @@ -425,7 +429,7 @@ def get_transform_to_multiscale_image( raise NotImplementedError() @abc.abstractmethod - def get_transform_to_point_cloud( + def get_transform_to_point_cloud_dataframe( self, key: str, *, subcollection: str = "obsl" ) -> coordinates.CoordinateTransform: """Returns the coordinate transformation from the scene to a requested diff --git a/python-spec/src/somacore/spatial.py b/python-spec/src/somacore/spatial.py index f0c93f3e..ba6244e4 100644 --- a/python-spec/src/somacore/spatial.py +++ b/python-spec/src/somacore/spatial.py @@ -34,18 +34,20 @@ _ReadData = TypeVar("_ReadData") -class PointCloud(base.SOMAObject, metaclass=abc.ABCMeta): - """A specialized SOMA DataFrame for storing collections of points in multi-dimensional space. +class PointCloudDataFrame(base.SOMAObject, metaclass=abc.ABCMeta): + """A specialized SOMA DataFrame for storing collections of points in + multi-dimensional space. - The ``PointCloud`` class is designed to efficiently store and query point data, where each - point is represented by coordinates in one or more spatial dimensions (e.g., x, y, z) and - may have additional columns for associated attributes. + The ``PointCloudDataFrame`` class is designed to efficiently store and query point + data, where each point is represented by coordinates in one or more spatial + dimensions (e.g., x, y, z) and may have additional columns for associated + attributes. Lifecycle: experimental """ __slots__ = () - soma_type: Final = "SOMAPointCloud" # type: ignore[misc] + soma_type: Final = "SOMAPointCloudDataFrame" # type: ignore[misc] @classmethod @abc.abstractmethod @@ -60,7 +62,7 @@ def create( platform_config: Optional[options.PlatformConfig] = None, context: Optional[Any] = None, ) -> Self: - """Creates a new ``PointCloud`` at the given URI. + """Creates a new ``PointCloudDataFrame`` at the given URI. The schema of the created point cloud will include a column named ``soma_joinid`` of type ``pyarrow.int64``, with negative values disallowed, and From c840b73b70056e5391cfab8a99bca048b7fff0e8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 3 Oct 2024 12:07:49 -0400 Subject: [PATCH 13/17] Sync `domain` argument between `Collection.add_new_dataframe` and `DataFrame.create` (#233) --- python-spec/src/somacore/collection.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/python-spec/src/somacore/collection.py b/python-spec/src/somacore/collection.py index e0ac269c..8ff2877e 100644 --- a/python-spec/src/somacore/collection.py +++ b/python-spec/src/somacore/collection.py @@ -1,5 +1,14 @@ import abc -from typing import Any, MutableMapping, Optional, Sequence, Type, TypeVar, overload +from typing import ( + Any, + MutableMapping, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + overload, +) import pyarrow as pa from typing_extensions import Final, Self @@ -145,6 +154,7 @@ def add_new_dataframe( uri: Optional[str] = None, schema: pa.Schema, index_column_names: Sequence[str] = (options.SOMA_JOINID,), + domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, platform_config: Optional[options.PlatformConfig] = None, ) -> data.DataFrame: """Creates a new DataFrame as a child of this collection. From edd8ab28979a494822e828da8efce72ee2933cb7 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Thu, 3 Oct 2024 13:13:48 -0400 Subject: [PATCH 14/17] Fix typos in Scene `add_*` methods (#234) --- python-spec/src/somacore/scene.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python-spec/src/somacore/scene.py b/python-spec/src/somacore/scene.py index 9806a663..d0c38526 100644 --- a/python-spec/src/somacore/scene.py +++ b/python-spec/src/somacore/scene.py @@ -159,7 +159,7 @@ def add_multiscale_image( uri: str, type: pa.DataType, reference_level_shape: Sequence[int], - axis_names: Sequence[str] = ("c", "x", "y"), + axis_names: Sequence[str] = ("c", "y", "x"), axis_types: Sequence[str] = ("channel", "height", "width"), ) -> _MultiscaleImage: """Adds a ``MultiscaleImage`` to the scene and sets a coordinate transform @@ -169,7 +169,7 @@ def add_multiscale_image( See :meth:`add_new_collection` for details about child URIs. Args: - key: The name of the geometry dataframe. + key: The name of the multiscale image. transform: The coordinate transformation from the scene to the dataframe. subcollection: The name, or sequence of names, of the subcollection the dataframe is stored in. Defaults to ``'obsl'``. @@ -202,7 +202,7 @@ def add_new_point_cloud_dataframe( See :meth:`add_new_collection` for details about child URIs. Args: - key: The name of the geometry dataframe. + key: The name of the point cloud dataframe. transform: The coordinate transformation from the scene to the dataframe. subcollection: The name, or sequence of names, of the subcollection the dataframe is stored in. Defaults to ``'obsl'``. From eee3130badf7362099cc07e30aa74e99c6061f37 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Mon, 14 Oct 2024 09:58:50 -0400 Subject: [PATCH 15/17] Coordinate quality of life improvements (#235) * Add `__repr__` methods to `CoordinateTransform` methods * Add `from_axis_names` creation method to `CoordinateSpace` --- python-spec/src/somacore/coordinates.py | 41 ++++++++++++++++++++++++- python-spec/testing/test_coordinates.py | 8 +++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/python-spec/src/somacore/coordinates.py b/python-spec/src/somacore/coordinates.py index 2703cb24..bf0d1c79 100644 --- a/python-spec/src/somacore/coordinates.py +++ b/python-spec/src/somacore/coordinates.py @@ -2,11 +2,13 @@ import abc import collections.abc -from typing import Optional, Sequence, Tuple, Union +import itertools +from typing import Iterable, Optional, Sequence, Tuple, Union import attrs import numpy as np import numpy.typing as npt +from typing_extensions import Self from .types import str_or_seq_length from .types import to_string_tuple @@ -37,6 +39,10 @@ class CoordinateSpace(collections.abc.Sequence[Axis]): axes: Tuple[Axis, ...] = attrs.field(converter=tuple) + @classmethod + def from_axis_names(cls, axis_names: Sequence[str,]) -> Self: + return cls(tuple(Axis(name) for name in axis_names)) # type: ignore[misc] + @axes.validator def _validate(self, _, axes: Tuple[Axis, ...]) -> None: if not axes: @@ -95,6 +101,25 @@ def _check_rmatmul_inner_axes(self, other: "CoordinateTransform"): f"{type(self).__name__}." ) + @abc.abstractmethod + def _contents_lines(self) -> Iterable[str]: + return + yield + + def _my_repr(self) -> Iterable[str]: + yield f"{type(self).__name__}" + yield f" input axes: {self._input_axes}" + yield f" output axes: {self._output_axes}" + + def __repr__(self) -> str: + content = self._contents_lines + lines = ( + self._my_repr() + if content is None + else itertools.chain(self._my_repr(), self._contents_lines()) + ) + return "<" + "\n".join(lines) + ">" + @abc.abstractmethod def __matmul__(self, other: object) -> "CoordinateTransform": raise NotImplementedError() @@ -185,6 +210,10 @@ def __init__( f"Unexpected shape {self._matrix.shape} for the input affine matrix." ) + def _contents_lines(self) -> Iterable[str]: + yield " augmented matrix:" + yield " " + str(self._matrix).replace("\n", "\n ") + def __matmul__(self, other: object) -> CoordinateTransform: if not isinstance(other, CoordinateTransform): raise NotImplementedError( @@ -259,6 +288,9 @@ def __init__( super().__init__(input_axes, output_axes, np.diag(self._scale_factors)) + def _contents_lines(self) -> Iterable[str]: + yield f" scales: {self._scale_factors}" + def __matmul__(self, other: object) -> CoordinateTransform: if not isinstance(other, CoordinateTransform): raise NotImplementedError( @@ -312,6 +344,9 @@ def __init__( rank = str_or_seq_length(input_axes) super().__init__(input_axes, output_axes, rank * [self._scale]) + def _contents_lines(self) -> Iterable[str]: + yield f" scale: {self._scale}" + def __matmul__(self, other: object) -> CoordinateTransform: if not isinstance(other, CoordinateTransform): raise NotImplementedError( @@ -361,6 +396,10 @@ def __init__( ): super().__init__(input_axes, output_axes, 1) + def _contents_lines(self) -> Iterable[str]: + return + yield + def __matmul__(self, other: object) -> CoordinateTransform: if not isinstance(other, CoordinateTransform): raise NotImplementedError( diff --git a/python-spec/testing/test_coordinates.py b/python-spec/testing/test_coordinates.py index 1bccb8a2..e2cb2f75 100644 --- a/python-spec/testing/test_coordinates.py +++ b/python-spec/testing/test_coordinates.py @@ -39,6 +39,14 @@ def test_coordinate_space(): assert coord_space[0] == Axis("x", unit="nanometer") +def test_coordiante_space_from_axis_names(): + coord_space = CoordinateSpace.from_axis_names(["alpha", "beta"]) + assert len(coord_space) == 2 + assert coord_space.axis_names == ("alpha", "beta") + assert coord_space[0] == Axis("alpha", unit=None) + assert coord_space[1] == Axis("beta", unit=None) + + @pytest.mark.parametrize( ("input", "expected"), [ From 9a314923095bdbd4bd5778d1784ceb7976409899 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:51:20 -0400 Subject: [PATCH 16/17] [python] Update `Scene` `add_*` methods (#236) * Rename `add_*` -> `add_new_*` for consistency with `Collection` methods. * Change type specific keywords to be `**kwargs` and refer to class create methods for specific keywords. * Make `transform` a keyword argument. --- .../src/somacore/ephemeral/collections.py | 45 +--------- python-spec/src/somacore/scene.py | 85 ++++++++++--------- 2 files changed, 48 insertions(+), 82 deletions(-) diff --git a/python-spec/src/somacore/ephemeral/collections.py b/python-spec/src/somacore/ephemeral/collections.py index 9a53cea6..07629bde 100644 --- a/python-spec/src/somacore/ephemeral/collections.py +++ b/python-spec/src/somacore/ephemeral/collections.py @@ -5,12 +5,10 @@ NoReturn, Optional, Sequence, - Tuple, TypeVar, Union, ) -import pyarrow as pa from typing_extensions import Literal, Self from .. import base @@ -167,51 +165,14 @@ def coordinate_space(self) -> coordinates.CoordinateSpace: def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: raise NotImplementedError() - def add_geometry_dataframe( - self, - key: str, - subcollection: Union[str, Sequence[str]], - transform: Optional[coordinates.CoordinateTransform], - *, - uri: str, - schema: pa.Schema, - index_column_names: Sequence[str] = ( - options.SOMA_JOINID, - options.SOMA_GEOMETRY, - ), - axis_names: Sequence[str] = ("x", "y"), - domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, - platform_config: Optional[options.PlatformConfig] = None, - context: Optional[Any] = None, - ) -> spatial.GeometryDataFrame: + def add_new_geometry_dataframe(self, *args, **kwargs) -> spatial.GeometryDataFrame: raise NotImplementedError() - def add_multiscale_image( - self, - key: str, - subcollection: Union[str, Sequence[str]], - transform: Optional[coordinates.CoordinateTransform], - *, - uri: str, - type: pa.DataType, - reference_level_shape: Sequence[int], - axis_names: Sequence[str] = ("c", "x", "y"), - axis_types: Sequence[str] = ("channel", "height", "width"), - ) -> spatial.MultiscaleImage: + def add_new_multiscale_image(self, *args, **kwargs) -> spatial.MultiscaleImage: raise NotImplementedError() def add_new_point_cloud_dataframe( - self, - key: str, - subcollection: Union[str, Sequence[str]], - transform: Optional[coordinates.CoordinateTransform], - *, - uri: Optional[str] = None, - schema: pa.Schema, - index_column_names: Sequence[str] = (options.SOMA_JOINID,), - axis_names: Sequence[str] = ("x", "y"), - domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, - platform_config: Optional[options.PlatformConfig] = None, + self, *args, **kwargs ) -> spatial.PointCloudDataFrame: raise NotImplementedError() diff --git a/python-spec/src/somacore/scene.py b/python-spec/src/somacore/scene.py index d0c38526..d00cb074 100644 --- a/python-spec/src/somacore/scene.py +++ b/python-spec/src/somacore/scene.py @@ -1,16 +1,14 @@ """Implementation of the SOMA scene collection for spatial data""" import abc -from typing import Any, Generic, Optional, Sequence, Tuple, TypeVar, Union +from typing import Generic, Optional, Sequence, TypeVar, Union -import pyarrow as pa from typing_extensions import Final from . import _mixin from . import base from . import collection from . import coordinates -from . import options from . import spatial _MultiscaleImage = TypeVar("_MultiscaleImage", bound=spatial.MultiscaleImage) @@ -107,40 +105,38 @@ def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: raise NotImplementedError() @abc.abstractmethod - def add_geometry_dataframe( + def add_new_geometry_dataframe( self, key: str, subcollection: Union[str, Sequence[str]], - transform: Optional[coordinates.CoordinateTransform], *, - uri: str, - schema: pa.Schema, - index_column_names: Sequence[str] = ( - options.SOMA_JOINID, - options.SOMA_GEOMETRY, - ), - axis_names: Sequence[str] = ("x", "y"), - domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, - platform_config: Optional[options.PlatformConfig] = None, - context: Optional[Any] = None, + transform: Optional[coordinates.CoordinateTransform], + uri: Optional[str] = ..., + **kwargs, ) -> _GeometryDataFrame: """Adds a ``GeometryDataFrame`` to the scene and sets a coordinate transform between the scene and the dataframe. - If the subcollection the geometry dataframe is inside of is more than one - layer deep, the input should be provided as a sequence of names. For example, - to set the transformation to a geometry dataframe named "transcripts" in - the "var/RNA" collection:: + If the subcollection the geometry dataframe will be created inside of is more + than one layer deep, the input should be provided as a sequence of names. For + example, to add a new geometry dataframe named "transcripts" in the "var/RNA" + collection:: - scene.add_geometry_dataframe( - 'cell_boundaries', subcollection=['var', 'RNA'], **kwargs + scene.add_new_geometry_dataframe( + 'transcripts', subcollection=['var', 'RNA'], **kwargs ) + See :meth:`add_new_collection` for details about child URIs. + Args: key: The name of the geometry dataframe. - transform: The coordinate transformation from the scene to the dataframe. subcollection: The name, or sequence of names, of the subcollection the dataframe is stored in. Defaults to ``'obsl'``. + transform: The coordinate transformation from the scene to the dataframe. + uri: If provided, overrides the default URI what would be used to create + this object. This may be aboslution or relative. + kwargs: Additional keyword arugments as specified in + :meth:`spatial.GeometryDataFrame.create`. Returns: The newly create ``GeometryDataFrame``, opened for writing. @@ -150,29 +146,29 @@ def add_geometry_dataframe( raise NotImplementedError() @abc.abstractmethod - def add_multiscale_image( + def add_new_multiscale_image( self, key: str, subcollection: Union[str, Sequence[str]], - transform: Optional[coordinates.CoordinateTransform], *, - uri: str, - type: pa.DataType, - reference_level_shape: Sequence[int], - axis_names: Sequence[str] = ("c", "y", "x"), - axis_types: Sequence[str] = ("channel", "height", "width"), + transform: Optional[coordinates.CoordinateTransform], + uri: Optional[str] = ..., + **kwargs, ) -> _MultiscaleImage: """Adds a ``MultiscaleImage`` to the scene and sets a coordinate transform between the scene and the dataframe. - Parameters are as in :meth:`spatial.PointCloudDataFrame.create`. See :meth:`add_new_collection` for details about child URIs. Args: key: The name of the multiscale image. - transform: The coordinate transformation from the scene to the dataframe. subcollection: The name, or sequence of names, of the subcollection the dataframe is stored in. Defaults to ``'obsl'``. + transform: The coordinate transformation from the scene to the dataframe. + uri: If provided, overrides the default URI what would be used to create + this object. This may be aboslution or relative. + kwargs: Additional keyword arugments as specified in + :meth:`spatial.MultiscaleImage.create`. Returns: The newly create ``MultiscaleImage``, opened for writing. @@ -186,26 +182,35 @@ def add_new_point_cloud_dataframe( self, key: str, subcollection: Union[str, Sequence[str]], - transform: Optional[coordinates.CoordinateTransform], *, - uri: Optional[str] = None, - schema: pa.Schema, - index_column_names: Sequence[str] = (options.SOMA_JOINID,), - axis_names: Sequence[str] = ("x", "y"), - domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, - platform_config: Optional[options.PlatformConfig] = None, + transform: Optional[coordinates.CoordinateTransform], + uri: Optional[str] = ..., + **kwargs, ) -> _PointCloudDataFrame: """Adds a point cloud to the scene and sets a coordinate transform between the scene and the dataframe. - Parameters are as in :meth:`spatial.PointCloudDataFrame.create`. + If the subcollection the point cloud dataframe will be added to is more than + one layer deep, the input should be provided as a sequence of names. For + example, to add a new point cloud dataframe named "transcripts" to the + "var/RNA" collection:: + + scene.add_new_point_cloud_dataframe( + 'transcripts', subcollection=['var', 'RNA'], **kwargs + ) + + See :meth:`add_new_collection` for details about child URIs. Args: key: The name of the point cloud dataframe. - transform: The coordinate transformation from the scene to the dataframe. subcollection: The name, or sequence of names, of the subcollection the dataframe is stored in. Defaults to ``'obsl'``. + transform: The coordinate transformation from the scene to the dataframe. + uri: If provided, overrides the default URI what would be used to create + this object. This may be aboslution or relative. + kwargs: Additional keyword arugments as specified in + :meth:`spatial.PointCloudDataFrame.create`. Returns: The newly created ``PointCloudDataFrame``, opened for writing. From aba232b837626b089151aba577425c70112a52b6 Mon Sep 17 00:00:00 2001 From: Julia Dark <24235303+jp-dark@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:08:44 -0400 Subject: [PATCH 17/17] Replace `axis_name` with `coordinate_space` in spatial dataframes (#237) Directly use the `CoordinateSpace` class instead of a tuple of "axis names" in the creation methods for the `PointCloudDataFrame` and `GeometryDataFrame` classes. --- python-spec/src/somacore/spatial.py | 46 +++++++++++------------------ 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/python-spec/src/somacore/spatial.py b/python-spec/src/somacore/spatial.py index ba6244e4..05395921 100644 --- a/python-spec/src/somacore/spatial.py +++ b/python-spec/src/somacore/spatial.py @@ -56,8 +56,11 @@ def create( uri: str, *, schema: pa.Schema, - index_column_names: Sequence[str] = (options.SOMA_JOINID, "x", "y"), - axis_names: Sequence[str] = ("x", "y"), + coordinate_space: Union[Sequence[str], coordinates.CoordinateSpace] = ( + "x", + "y", + ), + index_column_names: Optional[Sequence[str]] = None, domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, platform_config: Optional[options.PlatformConfig] = None, context: Optional[Any] = None, @@ -77,12 +80,13 @@ def create( must define all columns, including columns to be named as index columns. If the schema includes types unsupported by the SOMA implementation, an error will be raised. + coordinate_space: Either the coordinate space or the axis names for the + coordinate space the point cloud is defined on. index_column_names: A list of column names to use as user-defined index - columns (e.g., ``['x', 'y']``). All named columns must exist in the - schema, and at least one index column name is required. - axis_names: An ordered list of axis column names that correspond to the - names of axes of the the coordinate space the points are defined on. - Must be the name of index columns. + columns (e.g., ``['x', 'y']``). Must include the axis names for all + axes in the coordinate space. May include the ``soma_joinid``. + Defaults to ``None`` which sets the index column names to be the + ``soma_joinid`` followed by the axis names for the coordinate space. domain: An optional sequence of tuples specifying the domain of each index column. Each tuple should be a pair consisting of the minimum and maximum values storable in the index column. If omitted entirely, @@ -242,15 +246,6 @@ def coordinate_space(self, value: coordinates.CoordinateSpace) -> None: """ raise NotImplementedError() - @property - @abc.abstractmethod - def axis_names(self) -> Tuple[str, ...]: - """The names of the axes of the coordinate space the data is defined on. - - Lifecycle: experimental - """ - raise NotImplementedError() - @property @abc.abstractmethod def domain(self) -> Tuple[Tuple[Any, Any], ...]: @@ -285,11 +280,14 @@ def create( uri: str, *, schema: pa.Schema, + coordinate_space: Union[Sequence[str], coordinates.CoordinateSpace] = ( + "x", + "y", + ), index_column_names: Sequence[str] = ( options.SOMA_JOINID, options.SOMA_GEOMETRY, ), - axis_names: Sequence[str] = ("x", "y"), domain: Optional[Sequence[Optional[Tuple[Any, Any]]]] = None, platform_config: Optional[options.PlatformConfig] = None, context: Optional[Any] = None, @@ -311,13 +309,12 @@ def create( must define all columns, including columns to be named as index columns. If the schema includes types unsupported by the SOMA implementation, an error will be raised. + coordinate_space: Either the coordinate space or the axis names for the + coordinate space the point cloud is defined on. index_column_names: A list of column names to use as user-defined index columns (e.g., ``['cell_type', 'tissue_type']``). All named columns must exist in the schema, and at least one index column name is required. - axis_names: An ordered list of axis column names that correspond to the - names of the axes of the coordinate space the geometries are defined - on. domain: An optional sequence of tuples specifying the domain of each index column. Two tuples must be provided for the ``soma_geometry`` column which store the width followed by the height. Each tuple should @@ -461,15 +458,6 @@ def index_column_names(self) -> Tuple[str, ...]: """ raise NotImplementedError() - @property - @abc.abstractmethod - def axis_names(self) -> Tuple[str, ...]: - """The names of the axes of the coordinate space the data is defined on. - - Lifecycle: experimental - """ - raise NotImplementedError() - @property @abc.abstractmethod def coordinate_space(self) -> coordinates.CoordinateSpace: