From 40d27b985ef422096b0d05a8162f9646ff7005d8 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 28 Jan 2024 14:04:22 -0800 Subject: [PATCH 01/48] initial sharding compatible version using cloudvolume --- python/neuroglancer/write_annotations.py | 101 ++++++++++++++++------- 1 file changed, 70 insertions(+), 31 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 272cd80cf..83ae0fe67 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -14,6 +14,7 @@ least one file written per annotation. """ +from collections import defaultdict import json import numbers import os @@ -21,11 +22,22 @@ import struct from collections.abc import Sequence from typing import Literal, NamedTuple, Optional, Union, cast +from cloudvolume.datasource.precomputed.sharding import ShardingSpecification, synthesize_shard_files import numpy as np from . import coordinate_space, viewer_state +class NumpyEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + if isinstance(obj, np.integer): + return int(obj) + if isinstance(obj, np.floating): + return float(obj) + return json.JSONEncoder.default(self, obj) + class Annotation(NamedTuple): id: int @@ -87,26 +99,34 @@ def __init__( self, coordinate_space: coordinate_space.CoordinateSpace, annotation_type: AnnotationType, + lower_bound: Sequence, relationships: Sequence[str] = (), properties: Sequence[viewer_state.AnnotationPropertySpec] = (), + chunk_size: Sequence[int] = [256, 256, 256], + id_sharding_spec: ShardingSpecification = None ): + self.chunk_size = np.array(chunk_size) self.coordinate_space = coordinate_space self.relationships = list(relationships) self.annotation_type = annotation_type self.properties = list(properties) + self.annotations_by_chunk = defaultdict(list) self.properties.sort(key=lambda p: -_PROPERTY_DTYPES[p.type][1]) self.annotations = [] self.rank = coordinate_space.rank self.dtype = _get_dtype_for_geometry( annotation_type, coordinate_space.rank ) + _get_dtype_for_properties(self.properties) - self.lower_bound = np.full( - shape=(self.rank,), fill_value=float("inf"), dtype=np.float32 - ) + self.lower_bound = np.array(lower_bound, dtype=np.float32) + assert(len(self.lower_bound) == self.rank) self.upper_bound = np.full( shape=(self.rank,), fill_value=float("-inf"), dtype=np.float32 ) self.related_annotations = [{} for _ in self.relationships] + self.id_sharding_spec = id_sharding_spec + + def get_chunk_index(self, coords): + return tuple(((coords-self.lower_bound) // self.chunk_size).astype(np.int32)) def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): if self.annotation_type != "point": @@ -118,7 +138,7 @@ def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): f"Expected point to have length {self.coordinate_space.rank}, but received: {len(point)}" ) - self.lower_bound = np.minimum(self.lower_bound, point) + #self.lower_bound = np.minimum(self.lower_bound, point) self.upper_bound = np.maximum(self.upper_bound, point) self._add_obj(point, id, **kwargs) @@ -165,7 +185,7 @@ def _add_two_point_obj( f"Expected coordinates to have length {self.coordinate_space.rank}, but received: {len(point_b)}" ) - self.lower_bound = np.minimum(self.lower_bound, point_a) + #self.lower_bound = np.minimum(self.lower_bound, point_a) self.upper_bound = np.maximum(self.upper_bound, point_b) coords = np.concatenate((point_a, point_b)) self._add_obj(cast(Sequence[float], coords), id, **kwargs) @@ -197,14 +217,22 @@ def _add_obj(self, coords: Sequence[float], id: Optional[int], **kwargs): id=id, encoded=encoded.tobytes(), relationships=related_ids ) + chunk_index = self.get_chunk_index(np.array(coords[:self.rank])) + self.annotations_by_chunk[chunk_index].append(annotation) self.annotations.append(annotation) - for i, segment_ids in enumerate(related_ids): for segment_id in segment_ids: rel_index = self.related_annotations[i] rel_index_list = rel_index.setdefault(segment_id, []) rel_index_list.append(annotation) + def _serialize_annotations_sharded(self, path, annotations: list[Annotation], shard_spec: ShardingSpecification): + ann_dict_encoding = {a.id: a.encoded for a in annotations} + shard_files = synthesize_shard_files(shard_spec, ann_dict_encoding) + for shard_id, shard_file in shard_files.items(): + with open(os.path.join(path, f"{shard_id}"), "wb") as f: + f.write(shard_file) + def _serialize_annotations(self, f, annotations: list[Annotation]): f.write(struct.pack(" Date: Tue, 30 Jan 2024 08:58:15 -0800 Subject: [PATCH 02/48] make cloud volume import optional --- python/neuroglancer/write_annotations.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 83ae0fe67..7d4aaffb7 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -22,7 +22,21 @@ import struct from collections.abc import Sequence from typing import Literal, NamedTuple, Optional, Union, cast -from cloudvolume.datasource.precomputed.sharding import ShardingSpecification, synthesize_shard_files +from logging import warning +try: + from cloudvolume.datasource.precomputed.sharding import ( + ShardingSpecification, synthesize_shard_files + ) +except ImportError: + class ShardingSpecification: + def __init__(self, *args, **kwargs): + raise NotImplementedError("cloudvolume is not installed") + + def synthesize_shard_files(*args, **kwargs): + raise NotImplementedError("cloudvolume is not installed") + + warning("cloudvolume is not installed, so sharding is not supported." + "pip install cloud-volume to install") import numpy as np @@ -125,6 +139,7 @@ def __init__( self.related_annotations = [{} for _ in self.relationships] self.id_sharding_spec = id_sharding_spec + def get_chunk_index(self, coords): return tuple(((coords-self.lower_bound) // self.chunk_size).astype(np.int32)) @@ -140,6 +155,7 @@ def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): #self.lower_bound = np.minimum(self.lower_bound, point) self.upper_bound = np.maximum(self.upper_bound, point) + self.kdtree.add(point) self._add_obj(point, id, **kwargs) def add_axis_aligned_bounding_box( From 04e623b722f34b0e493dfb2945759dc8a4bfd619 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 31 Jan 2024 10:37:36 -0800 Subject: [PATCH 03/48] changed to write via tensorstore --- python/neuroglancer/write_annotations.py | 121 +++++++++++++++++------ 1 file changed, 89 insertions(+), 32 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 7d4aaffb7..6b4e4cdd5 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -23,21 +23,7 @@ from collections.abc import Sequence from typing import Literal, NamedTuple, Optional, Union, cast from logging import warning -try: - from cloudvolume.datasource.precomputed.sharding import ( - ShardingSpecification, synthesize_shard_files - ) -except ImportError: - class ShardingSpecification: - def __init__(self, *args, **kwargs): - raise NotImplementedError("cloudvolume is not installed") - - def synthesize_shard_files(*args, **kwargs): - raise NotImplementedError("cloudvolume is not installed") - - warning("cloudvolume is not installed, so sharding is not supported." - "pip install cloud-volume to install") - +import tensorstore as ts import numpy as np from . import coordinate_space, viewer_state @@ -74,8 +60,50 @@ class Annotation(NamedTuple): } AnnotationType = Literal["point", "line", "axis_aligned_bounding_box", "ellipsoid"] +ShardHashType = Literal["murmurhash3_x86_128", "identity_hash"] + +MINISHARD_TARGET_COUNT = 1000 +SHARD_TARGET_SIZE = 50000000 +def choose_output_spec(total_count, total_bytes, + hash: ShardHashType = "murmurhash3_x86_128", + gzip_compress=True): + if total_count ==1: + return None + + options = { + '@type': 'neuroglancer_uint64_sharded_v1', + 'hash': hash, + } + + total_minishard_bits = 0 + while (total_count >> total_minishard_bits) > MINISHARD_TARGET_COUNT: + total_minishard_bits += 1 + + shard_bits = 0 + while (total_bytes >> shard_bits) > SHARD_TARGET_SIZE: + shard_bits += 1 + + preshift_bits = 0 + while MINISHARD_TARGET_COUNT >> preshift_bits: + preshift_bits += 1 + + options['preshift_bits'] = preshift_bits + options['shard_bits'] = shard_bits + options['minishard_bits'] = total_minishard_bits - min(total_minishard_bits, shard_bits) + if gzip_compress: + options['data_encoding'] = 'gzip' + options['minishard_index_encoding'] = 'gzip' + else: + options['data_encoding'] = 'raw' + options['minishard_index_encoding'] = 'raw' + + # options.setdefault('minishard_index_compression', {}).setdefault('gzip_compression', {})['level'] = minishard_index_compression + # options.setdefault('data_compression', {}).setdefault('gzip_compression', {})['level'] = data_compression + + return options + def _get_dtype_for_geometry(annotation_type: AnnotationType, rank: int): geometry_size = rank if annotation_type == "point" else 2 * rank return [("geometry", " Date: Wed, 31 Jan 2024 10:45:13 -0800 Subject: [PATCH 04/48] formatting fixes --- python/neuroglancer/write_annotations.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 6b4e4cdd5..92b78d0a8 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -67,14 +67,14 @@ class Annotation(NamedTuple): def choose_output_spec(total_count, total_bytes, - hash: ShardHashType = "murmurhash3_x86_128", + hashtype: ShardHashType = "murmurhash3_x86_128", gzip_compress=True): if total_count ==1: return None options = { '@type': 'neuroglancer_uint64_sharded_v1', - 'hash': hash, + 'hash': hashtype, } total_minishard_bits = 0 @@ -99,11 +99,9 @@ def choose_output_spec(total_count, total_bytes, options['data_encoding'] = 'raw' options['minishard_index_encoding'] = 'raw' - # options.setdefault('minishard_index_compression', {}).setdefault('gzip_compression', {})['level'] = minishard_index_compression - # options.setdefault('data_compression', {}).setdefault('gzip_compression', {})['level'] = data_compression - return options + def _get_dtype_for_geometry(annotation_type: AnnotationType, rank: int): geometry_size = rank if annotation_type == "point" else 2 * rank return [("geometry", " Date: Thu, 1 Feb 2024 11:05:50 -0800 Subject: [PATCH 05/48] spatial sharding WIP --- python/neuroglancer/write_annotations.py | 192 ++++++++++++++++++----- 1 file changed, 154 insertions(+), 38 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 92b78d0a8..c00cd82f3 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -22,9 +22,9 @@ import struct from collections.abc import Sequence from typing import Literal, NamedTuple, Optional, Union, cast -from logging import warning import tensorstore as ts import numpy as np +import math from . import coordinate_space, viewer_state @@ -102,6 +102,55 @@ def choose_output_spec(total_count, total_bytes, return options +def compressed_morton_code(gridpt, grid_size): + # from cloudvolume + if hasattr(gridpt, "__len__") and len(gridpt) == 0: # generators don't have len + return np.zeros((0,), dtype=np.uint32) + + gridpt = np.asarray(gridpt, dtype=np.uint32) + single_input = False + if gridpt.ndim == 1: + gridpt = np.atleast_2d(gridpt) + single_input = True + + code = np.zeros((gridpt.shape[0],), dtype=np.uint64) + num_bits = [ math.ceil(math.log2(size)) for size in grid_size ] + j = np.uint64(0) + one = np.uint64(1) + + if sum(num_bits) > 64: + raise ValueError(f"Unable to represent grids that require more than 64 bits. Grid size {grid_size} requires {num_bits} bits.") + + max_coords = np.max(gridpt, axis=0) + if np.any(max_coords >= grid_size): + raise ValueError(f"Unable to represent grid points larger than the grid. Grid size: {grid_size} Grid points: {gridpt}") + + for i in range(max(num_bits)): + for dim in range(3): + if 2 ** i < grid_size[dim]: + bit = (((np.uint64(gridpt[:, dim]) >> np.uint64(i)) & one) << j) + code |= bit + j += one + print(gridpt, grid_size, code) + if single_input: + return code[0] + return code + +# def compressed_morton_code(position, shape): +# output_bit = 0 +# rank = len(position) +# output_num = 0 +# for bit in range(32): +# for dim in range(rank-1, -1, -1): +# if (shape[dim] - 1) >> bit: +# output_num |= ((position[dim] >> bit) & 1) << output_bit +# output_bit += 1 +# if output_bit == 64: +# # In Python, we don't have the 32-bit limitation, so we don't need to split into high and low. +# # But you can add code here to handle or signal overflow if needed. +# pass +# return output_num + def _get_dtype_for_geometry(annotation_type: AnnotationType, rank: int): geometry_size = rank if annotation_type == "point" else 2 * rank return [("geometry", " Date: Fri, 2 Feb 2024 07:24:26 -0800 Subject: [PATCH 06/48] fix big endian encoding of spatial index and description --- python/neuroglancer/write_annotations.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index c00cd82f3..29cd95187 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -1,17 +1,15 @@ """Writes annotations in the Precomputed annotation format. This provides a simple way to write annotations in the precomputed format, but -has a number of limitations that makes it suitable only for a relatively small -amount of annotation data: +has a number of limitations that makes it suitable only for writing +up to a few million of annotations, and not beyond that. - All annotations are buffered in memory. -- Only a trivial spatial index consisting of a single grid cell at a single - level is generated. Consequently, Neuroglancer will be forced to download all - annotations at once. +- Only a single spatial index of a fixed grid size is generated. + No downsampling is performed. Consequently, Neuroglancer will be forced + to download all annotations to render them in 3 dimensions. -- All indices are written in the unsharded format. Consequently, there is at - least one file written per annotation. """ from collections import defaultdict @@ -131,7 +129,6 @@ def compressed_morton_code(gridpt, grid_size): bit = (((np.uint64(gridpt[:, dim]) >> np.uint64(i)) & one) << j) code |= bit j += one - print(gridpt, grid_size, code) if single_input: return code[0] return code @@ -404,9 +401,9 @@ def _serialize_annotation_chunk_sharded(self, path, annotations_by_chunk, shard_ for chunk_index, annotations in annotations_by_chunk.items(): # calculate the compressed morton code for the chunk index key = compressed_morton_code(chunk_index, max_sizes) - print(key, type(key)) - key = key.astype(' Date: Fri, 2 Feb 2024 07:43:28 -0800 Subject: [PATCH 07/48] formatting fixes --- python/neuroglancer/write_annotations.py | 142 ++++++++++------------- 1 file changed, 64 insertions(+), 78 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 29cd95187..6ae811b23 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -6,7 +6,7 @@ - All annotations are buffered in memory. -- Only a single spatial index of a fixed grid size is generated. +- Only a single spatial index of a fixed grid size is generated. No downsampling is performed. Consequently, Neuroglancer will be forced to download all annotations to render them in 3 dimensions. @@ -22,20 +22,25 @@ from typing import Literal, NamedTuple, Optional, Union, cast import tensorstore as ts import numpy as np -import math from . import coordinate_space, viewer_state + class NumpyEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - if isinstance(obj, np.integer): - return int(obj) - if isinstance(obj, np.floating): - return float(obj) - return json.JSONEncoder.default(self, obj) - + """Special json encoder for numpy types. + + Args: + json (dict): A dictionary to be encoded. + """ + def default(self, o): + if isinstance(o, np.ndarray): + return o.tolist() + if isinstance(o, np.integer): + return int(o) + if isinstance(o, np.floating): + return float(o) + return json.JSONEncoder.default(self, o) + class Annotation(NamedTuple): id: int @@ -58,18 +63,23 @@ class Annotation(NamedTuple): } AnnotationType = Literal["point", "line", "axis_aligned_bounding_box", "ellipsoid"] -ShardHashType = Literal["murmurhash3_x86_128", "identity_hash"] MINISHARD_TARGET_COUNT = 1000 SHARD_TARGET_SIZE = 50000000 def choose_output_spec(total_count, total_bytes, - hashtype: ShardHashType = "murmurhash3_x86_128", - gzip_compress=True): - if total_count ==1: + hashtype: str = "murmurhash3_x86_128", + gzip_compress=True): + if total_count == 1: return None - + + # test if hashtype is valid + if hashtype not in ["murmurhash3_x86_128", "identity_hash"]: + raise ValueError(f"Invalid hashtype {hashtype}." + "Must be one of 'murmurhash3_x86_128' " + "or 'identity_hash'") + options = { '@type': 'neuroglancer_uint64_sharded_v1', 'hash': hashtype, @@ -100,53 +110,30 @@ def choose_output_spec(total_count, total_bytes, return options -def compressed_morton_code(gridpt, grid_size): - # from cloudvolume - if hasattr(gridpt, "__len__") and len(gridpt) == 0: # generators don't have len - return np.zeros((0,), dtype=np.uint32) - - gridpt = np.asarray(gridpt, dtype=np.uint32) - single_input = False - if gridpt.ndim == 1: - gridpt = np.atleast_2d(gridpt) - single_input = True - - code = np.zeros((gridpt.shape[0],), dtype=np.uint64) - num_bits = [ math.ceil(math.log2(size)) for size in grid_size ] - j = np.uint64(0) - one = np.uint64(1) - - if sum(num_bits) > 64: - raise ValueError(f"Unable to represent grids that require more than 64 bits. Grid size {grid_size} requires {num_bits} bits.") - - max_coords = np.max(gridpt, axis=0) - if np.any(max_coords >= grid_size): - raise ValueError(f"Unable to represent grid points larger than the grid. Grid size: {grid_size} Grid points: {gridpt}") - - for i in range(max(num_bits)): - for dim in range(3): - if 2 ** i < grid_size[dim]: - bit = (((np.uint64(gridpt[:, dim]) >> np.uint64(i)) & one) << j) - code |= bit - j += one - if single_input: - return code[0] - return code - -# def compressed_morton_code(position, shape): -# output_bit = 0 -# rank = len(position) -# output_num = 0 -# for bit in range(32): -# for dim in range(rank-1, -1, -1): -# if (shape[dim] - 1) >> bit: -# output_num |= ((position[dim] >> bit) & 1) << output_bit -# output_bit += 1 -# if output_bit == 64: -# # In Python, we don't have the 32-bit limitation, so we don't need to split into high and low. -# # But you can add code here to handle or signal overflow if needed. -# pass -# return output_num +def compressed_morton_code(position: Sequence[int], shape: Sequence[int]): + """Converts a position in a grid to a compressed Morton code. + + Args: + position: A sequence of integers representing the position in the grid. + shape: A sequence of integers representing the shape of the grid. + + Returns: + int: The compressed Morton code. + """ + output_bit = 0 + rank = len(position) + output_num = 0 + for bit in range(32): + for dim in range(rank-1, -1, -1): + if (shape[dim] - 1) >> bit: + output_num |= ((position[dim] >> bit) & 1) << output_bit + output_bit += 1 + if output_bit == 64: + # In Python, we don't have the 32-bit limitation, so we don't need to split into high and low. + # But you can add code here to handle or signal overflow if needed. + pass + return output_num + def _get_dtype_for_geometry(annotation_type: AnnotationType, rank: int): geometry_size = rank if annotation_type == "point" else 2 * rank @@ -225,12 +212,11 @@ def __init__( annotation_type, coordinate_space.rank ) + _get_dtype_for_properties(self.properties) self.lower_bound = np.array(lower_bound, dtype=np.float32) - assert(len(self.lower_bound) == self.rank) + assert (len(self.lower_bound) == self.rank) self.upper_bound = np.full( shape=(self.rank,), fill_value=float("-inf"), dtype=np.float32 ) self.related_annotations = [{} for _ in self.relationships] - def get_chunk_index(self, coords): return tuple(((coords-self.lower_bound) // self.chunk_size).astype(np.int32)) @@ -245,7 +231,7 @@ def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): f"Expected point to have length {self.coordinate_space.rank}, but received: {len(point)}" ) - #self.lower_bound = np.minimum(self.lower_bound, point) + # self.lower_bound = np.minimum(self.lower_bound, point) self.upper_bound = np.maximum(self.upper_bound, point) self._add_obj(point, id, **kwargs) @@ -292,7 +278,7 @@ def _add_two_point_obj( f"Expected coordinates to have length {self.coordinate_space.rank}, but received: {len(point_b)}" ) - #self.lower_bound = np.minimum(self.lower_bound, point_a) + # self.lower_bound = np.minimum(self.lower_bound, point_a) self.upper_bound = np.maximum(self.upper_bound, point_b) coords = np.concatenate((point_a, point_b)) self._add_obj(cast(Sequence[float], coords), id, **kwargs) @@ -332,7 +318,7 @@ def _add_obj(self, coords: Sequence[float], id: Optional[int], **kwargs): rel_index = self.related_annotations[i] rel_index_list = rel_index.setdefault(segment_id, []) rel_index_list.append(annotation) - + def _serialize_annotations_sharded(self, path, annotations, shard_spec): spec = { 'driver': 'neuroglancer_uint64_sharded', @@ -344,12 +330,12 @@ def _serialize_annotations_sharded(self, path, annotations, shard_spec): for ann in annotations: # convert the ann.id to a binary representation of a uint64 key = ann.id.to_bytes(8, 'little') - dataset.with_transaction(txn)[key]=ann.encoded + dataset.with_transaction(txn)[key] = ann.encoded txn.commit_async().result() - + def _serialize_annotations(self, f, annotations: list[Annotation]): f.write(self._encode_multiple_annotations(annotations)) - + def _serialize_annotation(self, f, annotation: Annotation): f.write(annotation.encoded) for related_ids in annotation.relationships: @@ -374,7 +360,7 @@ def _encode_multiple_annotations(self, annotations: list[Annotation]): for annotation in annotations: binary_components.append(struct.pack(" Date: Fri, 2 Feb 2024 09:20:49 -0800 Subject: [PATCH 08/48] change shard spec to NamedTuple --- python/neuroglancer/write_annotations.py | 59 ++++++++++++++++-------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 6ae811b23..1cbd7080a 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -63,11 +63,31 @@ class Annotation(NamedTuple): } AnnotationType = Literal["point", "line", "axis_aligned_bounding_box", "ellipsoid"] - MINISHARD_TARGET_COUNT = 1000 SHARD_TARGET_SIZE = 50000000 +class ShardSpec(NamedTuple): + type: str + hash: Literal["murmurhash3_x86_128", "identity_hash"] + preshift_bits: int + shard_bits: int + minishard_bits: int + data_encoding: Literal["raw", "gzip"] + minishard_index_encoding: Literal["raw", "gzip"] + + def to_json(self): + return { + '@type': self.type, + 'hash': self.hash, + 'preshift_bits': self.preshift_bits, + 'shard_bits': self.shard_bits, + 'minishard_bits': self.minishard_bits, + 'data_encoding': str(self.data_encoding), + 'minishard_index_encoding': str(self.minishard_index_encoding) + } + + def choose_output_spec(total_count, total_bytes, hashtype: str = "murmurhash3_x86_128", gzip_compress=True): @@ -80,11 +100,6 @@ def choose_output_spec(total_count, total_bytes, "Must be one of 'murmurhash3_x86_128' " "or 'identity_hash'") - options = { - '@type': 'neuroglancer_uint64_sharded_v1', - 'hash': hashtype, - } - total_minishard_bits = 0 while (total_count >> total_minishard_bits) > MINISHARD_TARGET_COUNT: total_minishard_bits += 1 @@ -97,17 +112,21 @@ def choose_output_spec(total_count, total_bytes, while MINISHARD_TARGET_COUNT >> preshift_bits: preshift_bits += 1 - options['preshift_bits'] = preshift_bits - options['shard_bits'] = shard_bits - options['minishard_bits'] = total_minishard_bits - min(total_minishard_bits, shard_bits) + minishard_bits = total_minishard_bits - min(total_minishard_bits, shard_bits) if gzip_compress: - options['data_encoding'] = 'gzip' - options['minishard_index_encoding'] = 'gzip' + data_encoding = 'gzip' + minishard_index_encoding = 'gzip' else: - options['data_encoding'] = 'raw' - options['minishard_index_encoding'] = 'raw' + data_encoding = 'raw' + minishard_index_encoding = 'raw' - return options + return ShardSpec(type='neuroglancer_uint64_sharded_v1', + hash=hashtype, + preshift_bits=preshift_bits, + shard_bits=shard_bits, + minishard_bits=minishard_bits, + data_encoding=data_encoding, + minishard_index_encoding=minishard_index_encoding) def compressed_morton_code(position: Sequence[int], shape: Sequence[int]): @@ -322,7 +341,7 @@ def _add_obj(self, coords: Sequence[float], id: Optional[int], **kwargs): def _serialize_annotations_sharded(self, path, annotations, shard_spec): spec = { 'driver': 'neuroglancer_uint64_sharded', - 'metadata': shard_spec, + 'metadata': shard_spec.to_json(), "base": f"file://{path}" } dataset = ts.KvStore.open(spec).result() @@ -364,7 +383,7 @@ def _encode_multiple_annotations(self, annotations: list[Annotation]): def _serialize_annotations_by_related_id(self, path, related_id_dict, shard_spec): spec = { 'driver': 'neuroglancer_uint64_sharded', - 'metadata': shard_spec, + 'metadata': shard_spec.to_json(), "base": f"file://{path}" } dataset = ts.KvStore.open(spec).result() @@ -379,7 +398,7 @@ def _serialize_annotations_by_related_id(self, path, related_id_dict, shard_spec def _serialize_annotation_chunk_sharded(self, path, annotations_by_chunk, shard_spec, max_sizes): spec = { 'driver': 'neuroglancer_uint64_sharded', - 'metadata': shard_spec, + 'metadata': shard_spec.to_json(), "base": f"file://{path}" } dataset = ts.KvStore.open(spec).result() @@ -443,7 +462,7 @@ def write(self, path: Union[str, pathlib.Path]): self.annotations_by_chunk, spatial_sharding_spec, num_chunks.tolist()) - metadata['spatial'][0]['sharding'] = spatial_sharding_spec + metadata['spatial'][0]['sharding'] = spatial_sharding_spec.to_json() else: for chunk_index, annotations in self.annotations_by_chunk.items(): chunk_name = "_".join([str(c) for c in chunk_index]) @@ -454,7 +473,7 @@ def write(self, path: Union[str, pathlib.Path]): # write annotations by id if sharding_spec is not None: self._serialize_annotations_sharded(os.path.join(path, "by_id"), self.annotations, sharding_spec) - metadata["by_id"]["sharding"] = sharding_spec + metadata["by_id"]["sharding"] = sharding_spec.to_json() else: for annotation in self.annotations: with open(os.path.join(path, "by_id", str(annotation.id)), "wb") as f: @@ -468,7 +487,7 @@ def write(self, path: Union[str, pathlib.Path]): rel_md = {"id": relationship, "key": f"rel_{relationship}"} if relationship_sharding_spec is not None: - rel_md["sharding"] = relationship_sharding_spec + rel_md["sharding"] = relationship_sharding_spec.to_json() self._serialize_annotations_by_related_id(os.path.join(path, f"rel_{relationship}"), rel_index, relationship_sharding_spec) else: for segment_id, annotations in rel_index.items(): From 4e6ebb81481f3fea06329c1dd215385c509211d4 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 2 Feb 2024 10:26:01 -0800 Subject: [PATCH 09/48] make tensorstore import optional --- python/neuroglancer/write_annotations.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 1cbd7080a..038d5ade5 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -20,7 +20,12 @@ import struct from collections.abc import Sequence from typing import Literal, NamedTuple, Optional, Union, cast -import tensorstore as ts +import logging +try: + import tensorstore as ts +except ImportError: + logging.warning('Sharded write support requires tensorstore, Install with pip install tensorstore') + ts = None import numpy as np from . import coordinate_space, viewer_state @@ -93,6 +98,8 @@ def choose_output_spec(total_count, total_bytes, gzip_compress=True): if total_count == 1: return None + if ts is None: + return None # test if hashtype is valid if hashtype not in ["murmurhash3_x86_128", "identity_hash"]: From 29e9f754e65304d88d62f3b9dd78f77cfea2da56 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 3 Feb 2024 14:08:08 -0800 Subject: [PATCH 10/48] fixing typing errors --- python/neuroglancer/write_annotations.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 038d5ade5..b5649f037 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -94,7 +94,7 @@ def to_json(self): def choose_output_spec(total_count, total_bytes, - hashtype: str = "murmurhash3_x86_128", + hashtype: Literal["murmurhash3_x86_128", "identity_hash"] = "murmurhash3_x86_128", gzip_compress=True): if total_count == 1: return None @@ -121,11 +121,11 @@ def choose_output_spec(total_count, total_bytes, minishard_bits = total_minishard_bits - min(total_minishard_bits, shard_bits) if gzip_compress: - data_encoding = 'gzip' - minishard_index_encoding = 'gzip' + data_encoding: Literal["raw", "gzip"] = 'gzip' + minishard_index_encoding: Literal["raw", "gzip"] = 'gzip' else: - data_encoding = 'raw' - minishard_index_encoding = 'raw' + data_encoding: Literal["raw", "gzip"] = 'raw' + minishard_index_encoding: Literal["raw", "gzip"] = 'raw' return ShardSpec(type='neuroglancer_uint64_sharded_v1', hash=hashtype, @@ -506,4 +506,7 @@ def write(self, path: Union[str, pathlib.Path]): # write metadata info file with open(os.path.join(path, "info"), "w") as f: - f.write(json.dumps(metadata, cls=NumpyEncoder)) \ No newline at end of file + f.write(json.dumps(metadata, cls=NumpyEncoder)) + + + \ No newline at end of file From 6efd7bc235b5a60e8142317a7649382349ac282a Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 3 Feb 2024 14:12:10 -0800 Subject: [PATCH 11/48] fixing type hinting --- python/neuroglancer/write_annotations.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index b5649f037..7b8086f6e 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -19,7 +19,7 @@ import pathlib import struct from collections.abc import Sequence -from typing import Literal, NamedTuple, Optional, Union, cast +from typing import Literal, NamedTuple, Optional, Union, cast, List, DefaultDict import logging try: import tensorstore as ts @@ -120,12 +120,13 @@ def choose_output_spec(total_count, total_bytes, preshift_bits += 1 minishard_bits = total_minishard_bits - min(total_minishard_bits, shard_bits) + data_encoding: Literal["raw", "gzip"] = 'raw' + minishard_index_encoding: Literal["raw", "gzip"] = 'raw' + if gzip_compress: - data_encoding: Literal["raw", "gzip"] = 'gzip' - minishard_index_encoding: Literal["raw", "gzip"] = 'gzip' - else: - data_encoding: Literal["raw", "gzip"] = 'raw' - minishard_index_encoding: Literal["raw", "gzip"] = 'raw' + data_encoding = 'gzip' + minishard_index_encoding = 'gzip' + return ShardSpec(type='neuroglancer_uint64_sharded_v1', hash=hashtype, @@ -230,7 +231,7 @@ def __init__( self.relationships = list(relationships) self.annotation_type = annotation_type self.properties = list(properties) - self.annotations_by_chunk = defaultdict(list) + self.annotations_by_chunk: DefaultDict[str, List[Annotation]] = defaultdict(list) self.properties.sort(key=lambda p: -_PROPERTY_DTYPES[p.type][1]) self.annotations = [] self.rank = coordinate_space.rank From 8c9169a525235e4ce121d52af70675d3824ababb Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 3 Feb 2024 14:19:26 -0800 Subject: [PATCH 12/48] fixing lint issues --- python/neuroglancer/write_annotations.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 7b8086f6e..149838b63 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -13,18 +13,19 @@ """ from collections import defaultdict +from collections.abc import Sequence import json import numbers import os import pathlib import struct -from collections.abc import Sequence -from typing import Literal, NamedTuple, Optional, Union, cast, List, DefaultDict +from typing import Literal, NamedTuple, Optional, Union, cast import logging try: import tensorstore as ts except ImportError: - logging.warning('Sharded write support requires tensorstore, Install with pip install tensorstore') + logging.warning("Sharded write support requires tensorstore." + "Install with pip install tensorstore") ts = None import numpy as np @@ -231,7 +232,7 @@ def __init__( self.relationships = list(relationships) self.annotation_type = annotation_type self.properties = list(properties) - self.annotations_by_chunk: DefaultDict[str, List[Annotation]] = defaultdict(list) + self.annotations_by_chunk: defaultdict[str, list[Annotation]] = defaultdict(list) self.properties.sort(key=lambda p: -_PROPERTY_DTYPES[p.type][1]) self.annotations = [] self.rank = coordinate_space.rank @@ -506,8 +507,5 @@ def write(self, path: Union[str, pathlib.Path]): metadata["relationships"].append(rel_md) # write metadata info file - with open(os.path.join(path, "info"), "w") as f: + with open(os.path.join(path, "info"), "w", encoding="utf-8") as f: f.write(json.dumps(metadata, cls=NumpyEncoder)) - - - \ No newline at end of file From 949f22b59443b0b7e92d44dfae79ec2ab2dc82dc Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 3 Feb 2024 14:20:12 -0800 Subject: [PATCH 13/48] fix more linting --- python/neuroglancer/write_annotations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 149838b63..542de759b 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -128,7 +128,6 @@ def choose_output_spec(total_count, total_bytes, data_encoding = 'gzip' minishard_index_encoding = 'gzip' - return ShardSpec(type='neuroglancer_uint64_sharded_v1', hash=hashtype, preshift_bits=preshift_bits, From fd77f409ce3608fc2d4ef20e48f2a9f82fc9b51a Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 3 Feb 2024 14:24:17 -0800 Subject: [PATCH 14/48] fixing import linting --- python/neuroglancer/write_annotations.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 542de759b..24d365c25 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -12,22 +12,23 @@ """ -from collections import defaultdict -from collections.abc import Sequence import json +import logging import numbers import os import pathlib import struct +from collections import defaultdict +from collections.abc import Sequence from typing import Literal, NamedTuple, Optional, Union, cast -import logging + +import numpy as np try: import tensorstore as ts except ImportError: logging.warning("Sharded write support requires tensorstore." "Install with pip install tensorstore") ts = None -import numpy as np from . import coordinate_space, viewer_state From 3bc1a6405f0df2f4fa7baebf6f92190893b9c263 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 3 Feb 2024 14:32:38 -0800 Subject: [PATCH 15/48] fix: numpy.cast removal --- python/examples/example.py | 5 +++-- python/examples/example_coordinate_arrays.py | 5 +++-- python/examples/example_coordinate_transform.py | 5 +++-- python/examples/example_local_volume_coordinate_arrays.py | 5 +++-- python/examples/example_signed_int.py | 5 +++-- python/examples/flood_filling_simulation.py | 5 +++-- python/examples/interactive_inference.py | 5 +++-- python/neuroglancer/downsample.py | 2 +- python/neuroglancer/local_volume.py | 4 ++-- testdata/generate_npy_examples.py | 8 ++++---- 10 files changed, 28 insertions(+), 21 deletions(-) diff --git a/python/examples/example.py b/python/examples/example.py index ce558cbc5..e90d84eb1 100755 --- a/python/examples/example.py +++ b/python/examples/example.py @@ -14,8 +14,9 @@ def add_example_layers(state): a[1, :, :, :] = np.abs(np.sin(4 * (iy + iz))) * 255 a[2, :, :, :] = np.abs(np.sin(4 * (ix + iz))) * 255 - b = np.cast[np.uint32]( - np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10) + b = np.asarray( + np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), + dtype=np.uint32 ) b = np.pad(b, 1, "constant") dimensions = neuroglancer.CoordinateSpace( diff --git a/python/examples/example_coordinate_arrays.py b/python/examples/example_coordinate_arrays.py index f80e0adb2..627627fbd 100755 --- a/python/examples/example_coordinate_arrays.py +++ b/python/examples/example_coordinate_arrays.py @@ -14,8 +14,9 @@ def add_example_layers(state): a[1, :, :, :] = np.abs(np.sin(4 * (iy + iz))) * 255 a[2, :, :, :] = np.abs(np.sin(4 * (ix + iz))) * 255 - b = np.cast[np.uint32]( - np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10) + b = np.asarray( + np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), + dtype=np.uint32 ) b = np.pad(b, 1, "constant") dimensions = neuroglancer.CoordinateSpace( diff --git a/python/examples/example_coordinate_transform.py b/python/examples/example_coordinate_transform.py index b2a228b0c..48bfb68e3 100644 --- a/python/examples/example_coordinate_transform.py +++ b/python/examples/example_coordinate_transform.py @@ -14,8 +14,9 @@ ix, iy, iz = np.meshgrid( *[np.linspace(0, 1, n) for n in [100, 100, 100]], indexing="ij" ) - data = np.cast[np.uint32]( - np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10) + data = np.asarray( + np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), + dtype=np.uint32 ) data = np.pad(data, 1, "constant") dimensions = neuroglancer.CoordinateSpace( diff --git a/python/examples/example_local_volume_coordinate_arrays.py b/python/examples/example_local_volume_coordinate_arrays.py index 2f646a2ba..33cf01505 100644 --- a/python/examples/example_local_volume_coordinate_arrays.py +++ b/python/examples/example_local_volume_coordinate_arrays.py @@ -14,8 +14,9 @@ def add_example_layers(state): a[1, :, :, :] = np.abs(np.sin(4 * (iy + iz))) * 255 a[2, :, :, :] = np.abs(np.sin(4 * (ix + iz))) * 255 - b = np.cast[np.uint32]( - np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10) + b = np.asarray( + np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), + dtype=np.uint32 ) b = np.pad(b, 1, "constant") dimensions = neuroglancer.CoordinateSpace( diff --git a/python/examples/example_signed_int.py b/python/examples/example_signed_int.py index c0f69ff2e..a9a1f5ba7 100644 --- a/python/examples/example_signed_int.py +++ b/python/examples/example_signed_int.py @@ -10,8 +10,9 @@ def add_example_layer(state): *[np.linspace(0, 1, n) for n in [100, 100, 100]], indexing="ij" ) b = ( - np.cast[np.int32]( - np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10) + np.asarray( + np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), + dtype=[np.int32] ) - 2 ) diff --git a/python/examples/flood_filling_simulation.py b/python/examples/flood_filling_simulation.py index cdb044fd7..4f87ee8f3 100755 --- a/python/examples/flood_filling_simulation.py +++ b/python/examples/flood_filling_simulation.py @@ -169,8 +169,9 @@ def process_pos(pos): enqueue(tuple(new_pos)) dist_transform = scipy.ndimage.morphology.distance_transform_edt(~mask) - inf_results[slice_expr] = 1 + np.cast[np.uint8]( - np.minimum(dist_transform, 5) / 5.0 * 254 + inf_results[slice_expr] = 1 + np.asarray( + np.minimum(dist_transform, 5) / 5.0 * 254, + dtype=np.uint8, ) self.viewer.defer_callback(update_view) diff --git a/python/examples/interactive_inference.py b/python/examples/interactive_inference.py index 5baac0641..ec7f89953 100755 --- a/python/examples/interactive_inference.py +++ b/python/examples/interactive_inference.py @@ -96,8 +96,9 @@ def _do_inference(self, action_state): boundary_mask[:-1, :, :] |= gt_data[:-1, :, :] != gt_data[1:, :, :] boundary_mask[1:, :, :] |= gt_data[:-1, :, :] != gt_data[1:, :, :] dist_transform = scipy.ndimage.morphology.distance_transform_edt(~boundary_mask) - self.inf_results[slice_expr] = 1 + np.cast[np.uint8]( - np.minimum(dist_transform, 5) / 5.0 * 254 + self.inf_results[slice_expr] = 1 + np.asarray( + np.minimum(dist_transform, 5) / 5.0 * 254, + np.uint8 ) self.inf_volume.invalidate() diff --git a/python/neuroglancer/downsample.py b/python/neuroglancer/downsample.py index ba6084f16..b143b8b6f 100644 --- a/python/neuroglancer/downsample.py +++ b/python/neuroglancer/downsample.py @@ -31,7 +31,7 @@ def downsample_with_averaging(array, factor): indexing_expr = tuple(np.s_[:s] for s in part.shape) temp[indexing_expr] += part counts[indexing_expr] += 1 - return np.cast[array.dtype](temp / counts) + return np.asarray(temp / counts, dtype=array.dtype) def downsample_with_striding(array, factor): diff --git a/python/neuroglancer/local_volume.py b/python/neuroglancer/local_volume.py index fe467791b..00944ac34 100644 --- a/python/neuroglancer/local_volume.py +++ b/python/neuroglancer/local_volume.py @@ -193,7 +193,7 @@ def get_encoded_subvolume(self, data_format, start, end, scale_key): or np.prod(downsample_factor) > self.max_downsampling ): raise ValueError("Invalid downsampling factor.") - downsampled_shape = np.cast[np.int64](np.ceil(self.shape / downsample_factor)) + downsampled_shape = np.asarray(np.ceil(self.shape / downsample_factor, dtype=np.int64)) if np.any(end < start) or np.any(start < 0) or np.any(end > downsampled_shape): raise ValueError("Out of bounds data request.") @@ -208,7 +208,7 @@ def get_encoded_subvolume(self, data_format, start, end, scale_key): ) subvol = np.array(self.data[indexing_expr], copy=False) if subvol.dtype == "float64": - subvol = np.cast[np.float32](subvol) + subvol = np.asarray(subvol, dtype=np.float32) if np.any(downsample_factor != 1): if self.volume_type == "image": diff --git a/testdata/generate_npy_examples.py b/testdata/generate_npy_examples.py index 387395d8c..4c2e7bb51 100755 --- a/testdata/generate_npy_examples.py +++ b/testdata/generate_npy_examples.py @@ -36,11 +36,11 @@ def write_array(array): np.save(name, new_array) array_for_json = array if dtype == np.uint64: - array_for_json = (np.cast[np.dtype(" Date: Sat, 3 Feb 2024 14:36:28 -0800 Subject: [PATCH 16/48] another import block fix --- python/neuroglancer/write_annotations.py | 172 +++++++++++++---------- 1 file changed, 100 insertions(+), 72 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 24d365c25..9e76490ca 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -23,11 +23,14 @@ from typing import Literal, NamedTuple, Optional, Union, cast import numpy as np + try: import tensorstore as ts except ImportError: - logging.warning("Sharded write support requires tensorstore." - "Install with pip install tensorstore") + logging.warning( + "Sharded write support requires tensorstore." + "Install with pip install tensorstore" + ) ts = None from . import coordinate_space, viewer_state @@ -39,6 +42,7 @@ class NumpyEncoder(json.JSONEncoder): Args: json (dict): A dictionary to be encoded. """ + def default(self, o): if isinstance(o, np.ndarray): return o.tolist() @@ -85,19 +89,22 @@ class ShardSpec(NamedTuple): def to_json(self): return { - '@type': self.type, - 'hash': self.hash, - 'preshift_bits': self.preshift_bits, - 'shard_bits': self.shard_bits, - 'minishard_bits': self.minishard_bits, - 'data_encoding': str(self.data_encoding), - 'minishard_index_encoding': str(self.minishard_index_encoding) + "@type": self.type, + "hash": self.hash, + "preshift_bits": self.preshift_bits, + "shard_bits": self.shard_bits, + "minishard_bits": self.minishard_bits, + "data_encoding": str(self.data_encoding), + "minishard_index_encoding": str(self.minishard_index_encoding), } -def choose_output_spec(total_count, total_bytes, - hashtype: Literal["murmurhash3_x86_128", "identity_hash"] = "murmurhash3_x86_128", - gzip_compress=True): +def choose_output_spec( + total_count, + total_bytes, + hashtype: Literal["murmurhash3_x86_128", "identity_hash"] = "murmurhash3_x86_128", + gzip_compress=True, +): if total_count == 1: return None if ts is None: @@ -105,9 +112,11 @@ def choose_output_spec(total_count, total_bytes, # test if hashtype is valid if hashtype not in ["murmurhash3_x86_128", "identity_hash"]: - raise ValueError(f"Invalid hashtype {hashtype}." - "Must be one of 'murmurhash3_x86_128' " - "or 'identity_hash'") + raise ValueError( + f"Invalid hashtype {hashtype}." + "Must be one of 'murmurhash3_x86_128' " + "or 'identity_hash'" + ) total_minishard_bits = 0 while (total_count >> total_minishard_bits) > MINISHARD_TARGET_COUNT: @@ -122,20 +131,22 @@ def choose_output_spec(total_count, total_bytes, preshift_bits += 1 minishard_bits = total_minishard_bits - min(total_minishard_bits, shard_bits) - data_encoding: Literal["raw", "gzip"] = 'raw' - minishard_index_encoding: Literal["raw", "gzip"] = 'raw' + data_encoding: Literal["raw", "gzip"] = "raw" + minishard_index_encoding: Literal["raw", "gzip"] = "raw" if gzip_compress: - data_encoding = 'gzip' - minishard_index_encoding = 'gzip' + data_encoding = "gzip" + minishard_index_encoding = "gzip" - return ShardSpec(type='neuroglancer_uint64_sharded_v1', - hash=hashtype, - preshift_bits=preshift_bits, - shard_bits=shard_bits, - minishard_bits=minishard_bits, - data_encoding=data_encoding, - minishard_index_encoding=minishard_index_encoding) + return ShardSpec( + type="neuroglancer_uint64_sharded_v1", + hash=hashtype, + preshift_bits=preshift_bits, + shard_bits=shard_bits, + minishard_bits=minishard_bits, + data_encoding=data_encoding, + minishard_index_encoding=minishard_index_encoding, + ) def compressed_morton_code(position: Sequence[int], shape: Sequence[int]): @@ -152,7 +163,7 @@ def compressed_morton_code(position: Sequence[int], shape: Sequence[int]): rank = len(position) output_num = 0 for bit in range(32): - for dim in range(rank-1, -1, -1): + for dim in range(rank - 1, -1, -1): if (shape[dim] - 1) >> bit: output_num |= ((position[dim] >> bit) & 1) << output_bit output_bit += 1 @@ -203,7 +214,7 @@ def __init__( lower_bound: Sequence = (0, 0, 0), relationships: Sequence[str] = (), properties: Sequence[viewer_state.AnnotationPropertySpec] = (), - chunk_size: Sequence[int] = [256, 256, 256] + chunk_size: Sequence[int] = [256, 256, 256], ): """Initializes an `AnnotationWriter`. @@ -232,7 +243,9 @@ def __init__( self.relationships = list(relationships) self.annotation_type = annotation_type self.properties = list(properties) - self.annotations_by_chunk: defaultdict[str, list[Annotation]] = defaultdict(list) + self.annotations_by_chunk: defaultdict[str, list[Annotation]] = defaultdict( + list + ) self.properties.sort(key=lambda p: -_PROPERTY_DTYPES[p.type][1]) self.annotations = [] self.rank = coordinate_space.rank @@ -240,14 +253,14 @@ def __init__( annotation_type, coordinate_space.rank ) + _get_dtype_for_properties(self.properties) self.lower_bound = np.array(lower_bound, dtype=np.float32) - assert (len(self.lower_bound) == self.rank) + assert len(self.lower_bound) == self.rank self.upper_bound = np.full( shape=(self.rank,), fill_value=float("-inf"), dtype=np.float32 ) self.related_annotations = [{} for _ in self.relationships] def get_chunk_index(self, coords): - return tuple(((coords-self.lower_bound) // self.chunk_size).astype(np.int32)) + return tuple(((coords - self.lower_bound) // self.chunk_size).astype(np.int32)) def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): if self.annotation_type != "point": @@ -338,7 +351,7 @@ def _add_obj(self, coords: Sequence[float], id: Optional[int], **kwargs): id=id, encoded=encoded.tobytes(), relationships=related_ids ) - chunk_index = self.get_chunk_index(np.array(coords[:self.rank])) + chunk_index = self.get_chunk_index(np.array(coords[: self.rank])) self.annotations_by_chunk[chunk_index].append(annotation) self.annotations.append(annotation) for i, segment_ids in enumerate(related_ids): @@ -349,15 +362,15 @@ def _add_obj(self, coords: Sequence[float], id: Optional[int], **kwargs): def _serialize_annotations_sharded(self, path, annotations, shard_spec): spec = { - 'driver': 'neuroglancer_uint64_sharded', - 'metadata': shard_spec.to_json(), - "base": f"file://{path}" - } + "driver": "neuroglancer_uint64_sharded", + "metadata": shard_spec.to_json(), + "base": f"file://{path}", + } dataset = ts.KvStore.open(spec).result() txn = ts.Transaction() for ann in annotations: # convert the ann.id to a binary representation of a uint64 - key = ann.id.to_bytes(8, 'little') + key = ann.id.to_bytes(8, "little") dataset.with_transaction(txn)[key] = ann.encoded txn.commit_async().result() @@ -391,25 +404,27 @@ def _encode_multiple_annotations(self, annotations: list[Annotation]): def _serialize_annotations_by_related_id(self, path, related_id_dict, shard_spec): spec = { - 'driver': 'neuroglancer_uint64_sharded', - 'metadata': shard_spec.to_json(), - "base": f"file://{path}" - } + "driver": "neuroglancer_uint64_sharded", + "metadata": shard_spec.to_json(), + "base": f"file://{path}", + } dataset = ts.KvStore.open(spec).result() txn = ts.Transaction() for related_id, annotations in related_id_dict.items(): # convert the ann.id to a binary representation of a uint64 - key = related_id.to_bytes(8, 'little') + key = related_id.to_bytes(8, "little") value = self._encode_multiple_annotations(annotations) dataset.with_transaction(txn)[key] = value txn.commit_async().result() - def _serialize_annotation_chunk_sharded(self, path, annotations_by_chunk, shard_spec, max_sizes): + def _serialize_annotation_chunk_sharded( + self, path, annotations_by_chunk, shard_spec, max_sizes + ): spec = { - 'driver': 'neuroglancer_uint64_sharded', - 'metadata': shard_spec.to_json(), - "base": f"file://{path}" - } + "driver": "neuroglancer_uint64_sharded", + "metadata": shard_spec.to_json(), + "base": f"file://{path}", + } dataset = ts.KvStore.open(spec).result() txn = ts.Transaction() for chunk_index, annotations in annotations_by_chunk.items(): @@ -417,7 +432,7 @@ def _serialize_annotation_chunk_sharded(self, path, annotations_by_chunk, shard_ key = compressed_morton_code(chunk_index, max_sizes) # convert the np.uint64 to a binary representation of a uint64 # using big endian representation - key = np.ascontiguousarray(key, dtype='>u8').tobytes() + key = np.ascontiguousarray(key, dtype=">u8").tobytes() value = self._encode_multiple_annotations(annotations) dataset.with_transaction(txn)[key] = value @@ -432,19 +447,20 @@ def write(self, path: Union[str, pathlib.Path]): "annotation_type": self.annotation_type, "properties": [p.to_json() for p in self.properties], "relationships": [], - "by_id": { - "key": "by_id" - } + "by_id": {"key": "by_id"}, } total_ann_bytes = sum(len(a.encoded) for a in self.annotations) - sharding_spec = choose_output_spec(len(self.annotations), - total_ann_bytes) + sharding_spec = choose_output_spec(len(self.annotations), total_ann_bytes) # calculate the number of chunks in each dimension - num_chunks = np.ceil((self.upper_bound - self.lower_bound) / self.chunk_size).astype(int) + num_chunks = np.ceil( + (self.upper_bound - self.lower_bound) / self.chunk_size + ).astype(int) # find the maximum number of annotations in any chunk - max_annotations = max(len(annotations) for annotations in self.annotations_by_chunk.values()) + max_annotations = max( + len(annotations) for annotations in self.annotations_by_chunk.values() + ) # make directories os.makedirs(path, exist_ok=True) @@ -454,34 +470,39 @@ def write(self, path: Union[str, pathlib.Path]): os.makedirs(os.path.join(path, "spatial0"), exist_ok=True) total_chunks = len(self.annotations_by_chunk) - spatial_sharding_spec = choose_output_spec(total_chunks, - total_ann_bytes + 8*len(self.annotations)+8*total_chunks) + spatial_sharding_spec = choose_output_spec( + total_chunks, total_ann_bytes + 8 * len(self.annotations) + 8 * total_chunks + ) # initialize metadata for spatial index - metadata['spatial'] = [ + metadata["spatial"] = [ { "key": "spatial0", "grid_shape": num_chunks.tolist(), "chunk_size": [int(x) for x in self.chunk_size], - "limit": max_annotations + "limit": max_annotations, } ] # write annotations by spatial chunk if spatial_sharding_spec is not None: - self._serialize_annotation_chunk_sharded(os.path.join(path, "spatial0"), - self.annotations_by_chunk, - spatial_sharding_spec, - num_chunks.tolist()) - metadata['spatial'][0]['sharding'] = spatial_sharding_spec.to_json() + self._serialize_annotation_chunk_sharded( + os.path.join(path, "spatial0"), + self.annotations_by_chunk, + spatial_sharding_spec, + num_chunks.tolist(), + ) + metadata["spatial"][0]["sharding"] = spatial_sharding_spec.to_json() else: for chunk_index, annotations in self.annotations_by_chunk.items(): chunk_name = "_".join([str(c) for c in chunk_index]) filepath = os.path.join(path, "spatial0", chunk_name) - with open(filepath, 'wb') as f: + with open(filepath, "wb") as f: self._serialize_annotations(f, annotations) # write annotations by id if sharding_spec is not None: - self._serialize_annotations_sharded(os.path.join(path, "by_id"), self.annotations, sharding_spec) + self._serialize_annotations_sharded( + os.path.join(path, "by_id"), self.annotations, sharding_spec + ) metadata["by_id"]["sharding"] = sharding_spec.to_json() else: for annotation in self.annotations: @@ -491,16 +512,23 @@ def write(self, path: Union[str, pathlib.Path]): # write relationships for i, relationship in enumerate(self.relationships): rel_index = self.related_annotations[i] - relationship_sharding_spec = choose_output_spec(len(rel_index), - total_ann_bytes + 8*len(self.annotations)+8*total_chunks) - rel_md = {"id": relationship, - "key": f"rel_{relationship}"} + relationship_sharding_spec = choose_output_spec( + len(rel_index), + total_ann_bytes + 8 * len(self.annotations) + 8 * total_chunks, + ) + rel_md = {"id": relationship, "key": f"rel_{relationship}"} if relationship_sharding_spec is not None: rel_md["sharding"] = relationship_sharding_spec.to_json() - self._serialize_annotations_by_related_id(os.path.join(path, f"rel_{relationship}"), rel_index, relationship_sharding_spec) + self._serialize_annotations_by_related_id( + os.path.join(path, f"rel_{relationship}"), + rel_index, + relationship_sharding_spec, + ) else: for segment_id, annotations in rel_index.items(): - filepath = os.path.join(path, f"rel_{relationship}", str(segment_id)) + filepath = os.path.join( + path, f"rel_{relationship}", str(segment_id) + ) with open(filepath, "wb") as f: self._serialize_annotations(f, annotations) From cd1e0ddda1ada2b29aff041a80c2bbe7dd5062d7 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 4 Feb 2024 12:06:34 -0800 Subject: [PATCH 17/48] ruff formatting --- python/examples/example.py | 2 +- python/examples/example_coordinate_arrays.py | 2 +- .../examples/example_coordinate_transform.py | 2 +- .../example_local_volume_coordinate_arrays.py | 2 +- python/examples/example_signed_int.py | 2 +- python/examples/interactive_inference.py | 3 +- python/neuroglancer/annotation_reader.py | 89 +++++++++++++++++++ python/neuroglancer/local_volume.py | 4 +- 8 files changed, 98 insertions(+), 8 deletions(-) create mode 100644 python/neuroglancer/annotation_reader.py diff --git a/python/examples/example.py b/python/examples/example.py index e90d84eb1..b4480e6e8 100755 --- a/python/examples/example.py +++ b/python/examples/example.py @@ -16,7 +16,7 @@ def add_example_layers(state): b = np.asarray( np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), - dtype=np.uint32 + dtype=np.uint32, ) b = np.pad(b, 1, "constant") dimensions = neuroglancer.CoordinateSpace( diff --git a/python/examples/example_coordinate_arrays.py b/python/examples/example_coordinate_arrays.py index 627627fbd..9302a1d0d 100755 --- a/python/examples/example_coordinate_arrays.py +++ b/python/examples/example_coordinate_arrays.py @@ -16,7 +16,7 @@ def add_example_layers(state): b = np.asarray( np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), - dtype=np.uint32 + dtype=np.uint32, ) b = np.pad(b, 1, "constant") dimensions = neuroglancer.CoordinateSpace( diff --git a/python/examples/example_coordinate_transform.py b/python/examples/example_coordinate_transform.py index 48bfb68e3..5422ac824 100644 --- a/python/examples/example_coordinate_transform.py +++ b/python/examples/example_coordinate_transform.py @@ -16,7 +16,7 @@ ) data = np.asarray( np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), - dtype=np.uint32 + dtype=np.uint32, ) data = np.pad(data, 1, "constant") dimensions = neuroglancer.CoordinateSpace( diff --git a/python/examples/example_local_volume_coordinate_arrays.py b/python/examples/example_local_volume_coordinate_arrays.py index 33cf01505..a1aac08f8 100644 --- a/python/examples/example_local_volume_coordinate_arrays.py +++ b/python/examples/example_local_volume_coordinate_arrays.py @@ -16,7 +16,7 @@ def add_example_layers(state): b = np.asarray( np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), - dtype=np.uint32 + dtype=np.uint32, ) b = np.pad(b, 1, "constant") dimensions = neuroglancer.CoordinateSpace( diff --git a/python/examples/example_signed_int.py b/python/examples/example_signed_int.py index a9a1f5ba7..be030e65d 100644 --- a/python/examples/example_signed_int.py +++ b/python/examples/example_signed_int.py @@ -12,7 +12,7 @@ def add_example_layer(state): b = ( np.asarray( np.floor(np.sqrt((ix - 0.5) ** 2 + (iy - 0.5) ** 2 + (iz - 0.5) ** 2) * 10), - dtype=[np.int32] + dtype=[np.int32], ) - 2 ) diff --git a/python/examples/interactive_inference.py b/python/examples/interactive_inference.py index ec7f89953..5793eac58 100755 --- a/python/examples/interactive_inference.py +++ b/python/examples/interactive_inference.py @@ -97,8 +97,7 @@ def _do_inference(self, action_state): boundary_mask[1:, :, :] |= gt_data[:-1, :, :] != gt_data[1:, :, :] dist_transform = scipy.ndimage.morphology.distance_transform_edt(~boundary_mask) self.inf_results[slice_expr] = 1 + np.asarray( - np.minimum(dist_transform, 5) / 5.0 * 254, - np.uint8 + np.minimum(dist_transform, 5) / 5.0 * 254, np.uint8 ) self.inf_volume.invalidate() diff --git a/python/neuroglancer/annotation_reader.py b/python/neuroglancer/annotation_reader.py new file mode 100644 index 000000000..0a761f1b2 --- /dev/null +++ b/python/neuroglancer/annotation_reader.py @@ -0,0 +1,89 @@ +from collections.abc import Sequence +import os +import json + + +class AnnotationReader: + def __init__(self, path): + self.path = path + self.annotations = [] + + # read the info file + self.metadata = self.read_info() + + self.type = self.metadata["annotation_type"] + + def read_info(self): + with open(os.path.join(self.path, "info"), "r") as f: + metadata = json.load(f) + return metadata + + def _decode_annotations(self, annotations): + """ + This function decodes the binary string of annotations into a list of annotation objects. + + Parameters: + annotations (bytes): Binary string of annotations. + + Returns: + list: List of annotation objects. Each object has 'id' and 'encoded' attributes. + """ + num_annotations = struct.unpack(" self.max_downsampling ): raise ValueError("Invalid downsampling factor.") - downsampled_shape = np.asarray(np.ceil(self.shape / downsample_factor, dtype=np.int64)) + downsampled_shape = np.asarray( + np.ceil(self.shape / downsample_factor, dtype=np.int64) + ) if np.any(end < start) or np.any(start < 0) or np.any(end > downsampled_shape): raise ValueError("Out of bounds data request.") From 5ce8665b0b398d6657d6f54e76da815e08afb86e Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 4 Feb 2024 12:29:20 -0800 Subject: [PATCH 18/48] feat: dynamic lower bounds --- python/neuroglancer/write_annotations.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 9e76490ca..902be2c97 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -211,7 +211,6 @@ def __init__( self, coordinate_space: coordinate_space.CoordinateSpace, annotation_type: AnnotationType, - lower_bound: Sequence = (0, 0, 0), relationships: Sequence[str] = (), properties: Sequence[viewer_state.AnnotationPropertySpec] = (), chunk_size: Sequence[int] = [256, 256, 256], @@ -252,15 +251,16 @@ def __init__( self.dtype = _get_dtype_for_geometry( annotation_type, coordinate_space.rank ) + _get_dtype_for_properties(self.properties) - self.lower_bound = np.array(lower_bound, dtype=np.float32) - assert len(self.lower_bound) == self.rank + self.lower_bound = np.full( + shape=(self.rank,), fill_value=float("inf"), dtype=np.float32 + ) self.upper_bound = np.full( shape=(self.rank,), fill_value=float("-inf"), dtype=np.float32 ) self.related_annotations = [{} for _ in self.relationships] def get_chunk_index(self, coords): - return tuple(((coords - self.lower_bound) // self.chunk_size).astype(np.int32)) + return tuple((coords // self.chunk_size).astype(np.int32)) def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): if self.annotation_type != "point": @@ -272,7 +272,7 @@ def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): f"Expected point to have length {self.coordinate_space.rank}, but received: {len(point)}" ) - # self.lower_bound = np.minimum(self.lower_bound, point) + self.lower_bound = np.minimum(self.lower_bound, point) self.upper_bound = np.maximum(self.upper_bound, point) self._add_obj(point, id, **kwargs) @@ -318,9 +318,11 @@ def _add_two_point_obj( raise ValueError( f"Expected coordinates to have length {self.coordinate_space.rank}, but received: {len(point_b)}" ) + min_vals = np.minimum(point_a, point_b) + max_vals = np.maximum(point_a, point_b) + self.lower_bound = np.minimum(self.lower_bound, min_vals) + self.upper_bound = np.maximum(self.upper_bound, max_vals) - # self.lower_bound = np.minimum(self.lower_bound, point_a) - self.upper_bound = np.maximum(self.upper_bound, point_b) coords = np.concatenate((point_a, point_b)) self._add_obj(cast(Sequence[float], coords), id, **kwargs) @@ -427,7 +429,10 @@ def _serialize_annotation_chunk_sharded( } dataset = ts.KvStore.open(spec).result() txn = ts.Transaction() + lower_chunk_index = self.get_chunk_index(self.lower_bound) + for chunk_index, annotations in annotations_by_chunk.items(): + chunk_index = np.array(chunk_index) - np.array(lower_chunk_index) # calculate the compressed morton code for the chunk index key = compressed_morton_code(chunk_index, max_sizes) # convert the np.uint64 to a binary representation of a uint64 @@ -479,7 +484,7 @@ def write(self, path: Union[str, pathlib.Path]): "key": "spatial0", "grid_shape": num_chunks.tolist(), "chunk_size": [int(x) for x in self.chunk_size], - "limit": max_annotations, + "limit": len(self.annotations), } ] # write annotations by spatial chunk From 8f08c24d22578c74576e2bc867b7a6bc7ef9a7e4 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 4 Feb 2024 12:35:57 -0800 Subject: [PATCH 19/48] removing annotation reader --- python/neuroglancer/annotation_reader.py | 89 ------------------------ 1 file changed, 89 deletions(-) delete mode 100644 python/neuroglancer/annotation_reader.py diff --git a/python/neuroglancer/annotation_reader.py b/python/neuroglancer/annotation_reader.py deleted file mode 100644 index 0a761f1b2..000000000 --- a/python/neuroglancer/annotation_reader.py +++ /dev/null @@ -1,89 +0,0 @@ -from collections.abc import Sequence -import os -import json - - -class AnnotationReader: - def __init__(self, path): - self.path = path - self.annotations = [] - - # read the info file - self.metadata = self.read_info() - - self.type = self.metadata["annotation_type"] - - def read_info(self): - with open(os.path.join(self.path, "info"), "r") as f: - metadata = json.load(f) - return metadata - - def _decode_annotations(self, annotations): - """ - This function decodes the binary string of annotations into a list of annotation objects. - - Parameters: - annotations (bytes): Binary string of annotations. - - Returns: - list: List of annotation objects. Each object has 'id' and 'encoded' attributes. - """ - num_annotations = struct.unpack(" Date: Sun, 4 Feb 2024 12:38:21 -0800 Subject: [PATCH 20/48] remove unused max annotations --- python/neuroglancer/write_annotations.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 902be2c97..e985b2dfc 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -462,11 +462,6 @@ def write(self, path: Union[str, pathlib.Path]): (self.upper_bound - self.lower_bound) / self.chunk_size ).astype(int) - # find the maximum number of annotations in any chunk - max_annotations = max( - len(annotations) for annotations in self.annotations_by_chunk.values() - ) - # make directories os.makedirs(path, exist_ok=True) for relationship in self.relationships: From e6211c04f77eb6cad66a99d459de7337310d34d5 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 4 Feb 2024 12:45:50 -0800 Subject: [PATCH 21/48] make chunk_size default dynamic to size of the coordinate system --- python/neuroglancer/write_annotations.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index e985b2dfc..05b5e4586 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -213,7 +213,7 @@ def __init__( annotation_type: AnnotationType, relationships: Sequence[str] = (), properties: Sequence[viewer_state.AnnotationPropertySpec] = (), - chunk_size: Sequence[int] = [256, 256, 256], + chunk_size: Union[int, Sequence[int]] = 256, ): """Initializes an `AnnotationWriter`. @@ -231,8 +231,9 @@ def __init__( is a dictionary with keys `"parent"` and `"child"`. properties: The properties of each annotation. Each property is a `AnnotationPropertySpec` object. - chunk_size: The size of each chunk in the spatial index. Must have the - same length as `coordinate_space.rank`. + chunk_size: The size of each chunk in the spatial index. + If an integer then all dimensions will be the same chunk size. + If a sequence, then must have the same length as `coordinate_space.rank`. write_id_sharded: If True, the annotations will be sharded by id. id_sharding_spec: The sharding specification for the id sharding. If not specified spec will be automatically configured @@ -251,6 +252,19 @@ def __init__( self.dtype = _get_dtype_for_geometry( annotation_type, coordinate_space.rank ) + _get_dtype_for_properties(self.properties) + + # if chunk_size is an integer, then make it a sequence + if isinstance(chunk_size, numbers.Integral): + self.chunk_size = np.full( + shape=(self.rank,), fill_value=chunk_size, dtype=np.int32 + ) + else: + if len(chunk_size) != self.rank: + raise ValueError( + f"Expected chunk_size to have length {self.rank}, but received: {len(chunk_size)}" + ) + self.chunk_size = np.array(chunk_size) + self.lower_bound = np.full( shape=(self.rank,), fill_value=float("inf"), dtype=np.float32 ) From 38eb5297143c3d36068ffdf8bbadbf2956fb16c9 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 4 Feb 2024 12:52:42 -0800 Subject: [PATCH 22/48] fixing mypy with cast --- python/neuroglancer/write_annotations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 05b5e4586..9bfb5eb80 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -259,6 +259,7 @@ def __init__( shape=(self.rank,), fill_value=chunk_size, dtype=np.int32 ) else: + chunk_size = cast(Sequence[int], chunk_size) if len(chunk_size) != self.rank: raise ValueError( f"Expected chunk_size to have length {self.rank}, but received: {len(chunk_size)}" From 073573b886e09733eb8e82df3c723363a64b9ca1 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 4 Feb 2024 14:57:55 -0800 Subject: [PATCH 23/48] mypy fix: value error print formatting --- python/neuroglancer/write_annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 9bfb5eb80..90509ba3d 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -262,7 +262,7 @@ def __init__( chunk_size = cast(Sequence[int], chunk_size) if len(chunk_size) != self.rank: raise ValueError( - f"Expected chunk_size to have length {self.rank}, but received: {len(chunk_size)}" + f"Expected chunk_size to have length {self.rank}, but received: {chunk_size}" ) self.chunk_size = np.array(chunk_size) From aece02deabe5871a0dce3a5a167bada724e06946 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 7 Feb 2024 07:10:40 -0800 Subject: [PATCH 24/48] fix doc string --- python/neuroglancer/write_annotations.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 90509ba3d..76acd998c 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -234,9 +234,6 @@ def __init__( chunk_size: The size of each chunk in the spatial index. If an integer then all dimensions will be the same chunk size. If a sequence, then must have the same length as `coordinate_space.rank`. - write_id_sharded: If True, the annotations will be sharded by id. - id_sharding_spec: The sharding specification for the id sharding. If - not specified spec will be automatically configured """ self.chunk_size = np.array(chunk_size) self.coordinate_space = coordinate_space From d8aa8584b0a6d1759431af9b741e2c48d62c66ea Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 7 Feb 2024 07:13:19 -0800 Subject: [PATCH 25/48] fix chunk_size typing --- python/neuroglancer/write_annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 76acd998c..3c512c8b8 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -213,7 +213,7 @@ def __init__( annotation_type: AnnotationType, relationships: Sequence[str] = (), properties: Sequence[viewer_state.AnnotationPropertySpec] = (), - chunk_size: Union[int, Sequence[int]] = 256, + chunk_size: Union[float, Sequence[float]] = 256, ): """Initializes an `AnnotationWriter`. From 34ac02af4c8da9a61c4b9db14cc7bd6008c94093 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 7 Feb 2024 11:28:14 -0800 Subject: [PATCH 26/48] renaming chunk size --- python/neuroglancer/write_annotations.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 3c512c8b8..2c3850d98 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -213,7 +213,7 @@ def __init__( annotation_type: AnnotationType, relationships: Sequence[str] = (), properties: Sequence[viewer_state.AnnotationPropertySpec] = (), - chunk_size: Union[float, Sequence[float]] = 256, + experimental_chunk_size: Union[float, Sequence[float]] = 256, ): """Initializes an `AnnotationWriter`. @@ -231,11 +231,14 @@ def __init__( is a dictionary with keys `"parent"` and `"child"`. properties: The properties of each annotation. Each property is a `AnnotationPropertySpec` object. - chunk_size: The size of each chunk in the spatial index. + experimental_chunk_size: The size of each chunk in the spatial index. If an integer then all dimensions will be the same chunk size. If a sequence, then must have the same length as `coordinate_space.rank`. + NOTE: it is anticipated that in the future downsampling will be added which + will start at a single top level chunk and move down, at which time this parameter + will be removed in favor of parameters that control downsampling. """ - self.chunk_size = np.array(chunk_size) + self.coordinate_space = coordinate_space self.relationships = list(relationships) self.annotation_type = annotation_type @@ -251,15 +254,15 @@ def __init__( ) + _get_dtype_for_properties(self.properties) # if chunk_size is an integer, then make it a sequence - if isinstance(chunk_size, numbers.Integral): + if isinstance(experimental_chunk_size, numbers.Integral): self.chunk_size = np.full( - shape=(self.rank,), fill_value=chunk_size, dtype=np.int32 + shape=(self.rank,), fill_value=experimental_chunk_size, dtype=np.int32 ) else: - chunk_size = cast(Sequence[int], chunk_size) + chunk_size = cast(Sequence[int], experimental_chunk_size) if len(chunk_size) != self.rank: raise ValueError( - f"Expected chunk_size to have length {self.rank}, but received: {chunk_size}" + f"Expected experimental_chunk_size to have length {self.rank}, but received: {chunk_size}" ) self.chunk_size = np.array(chunk_size) From b15286ba59df5b8b2d029533b90144bef5fa19a3 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 7 Feb 2024 17:11:05 -0800 Subject: [PATCH 27/48] add better handling of multiple point annotations --- python/neuroglancer/write_annotations.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 2c3850d98..3f427a4ec 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -289,7 +289,7 @@ def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): self.lower_bound = np.minimum(self.lower_bound, point) self.upper_bound = np.maximum(self.upper_bound, point) - self._add_obj(point, id, **kwargs) + self._add_obj(point, id, 1, **kwargs) def add_axis_aligned_bounding_box( self, @@ -302,7 +302,7 @@ def add_axis_aligned_bounding_box( raise ValueError( f"Expected annotation type axis_aligned_bounding_box, but received: {self.annotation_type}" ) - self._add_two_point_obj(point_a, point_b, id, **kwargs) + self._add_two_point_obj(point_a, point_b, id, 2, **kwargs) def add_line( self, @@ -315,13 +315,14 @@ def add_line( raise ValueError( f"Expected annotation type line, but received: {self.annotation_type}" ) - self._add_two_point_obj(point_a, point_b, id, **kwargs) + self._add_two_point_obj(point_a, point_b, id, 2, **kwargs) def _add_two_point_obj( self, point_a: Sequence[float], point_b: Sequence[float], id: Optional[int] = None, + n_spatial_coords: Optional[int] = 2, **kwargs, ): if len(point_a) != self.coordinate_space.rank: @@ -339,9 +340,15 @@ def _add_two_point_obj( self.upper_bound = np.maximum(self.upper_bound, max_vals) coords = np.concatenate((point_a, point_b)) - self._add_obj(cast(Sequence[float], coords), id, **kwargs) + self._add_obj(cast(Sequence[float], coords), id, n_spatial_coords, **kwargs) - def _add_obj(self, coords: Sequence[float], id: Optional[int], **kwargs): + def _add_obj( + self, + coords: Sequence[float], + id: Optional[int], + n_spatial_coords: Optional[int] = 1, + **kwargs, + ): encoded = np.zeros(shape=(), dtype=self.dtype) encoded[()]["geometry"] = coords @@ -368,8 +375,11 @@ def _add_obj(self, coords: Sequence[float], id: Optional[int], **kwargs): id=id, encoded=encoded.tobytes(), relationships=related_ids ) - chunk_index = self.get_chunk_index(np.array(coords[: self.rank])) - self.annotations_by_chunk[chunk_index].append(annotation) + for i in range(n_spatial_coords): + chunk_index = self.get_chunk_index( + np.array(coords[i * self.rank: (i + 1) * self.rank]) + ) + self.annotations_by_chunk[chunk_index].append(annotation) self.annotations.append(annotation) for i, segment_ids in enumerate(related_ids): for segment_id in segment_ids: From d073bea142a487d2a1697ce715bc742a7c4606cd Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 7 Feb 2024 17:14:44 -0800 Subject: [PATCH 28/48] adding ellipsoid --- python/neuroglancer/write_annotations.py | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 3f427a4ec..83c586e80 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -304,6 +304,35 @@ def add_axis_aligned_bounding_box( ) self._add_two_point_obj(point_a, point_b, id, 2, **kwargs) + def add_ellipsoid( + self, + center: Sequence[float], + radii: Sequence[float], + id: Optional[int] = None, + **kwargs, + ): + if self.annotation_type != "ellipsoid": + raise ValueError( + f"Expected annotation type ellipsoid, but received: {self.annotation_type}" + ) + if len(center) != self.coordinate_space.rank: + raise ValueError( + f"Expected center to have length {self.coordinate_space.rank}, but received: {len(center)}" + ) + + if len(radii) != self.coordinate_space.rank: + raise ValueError( + f"Expected radii to have length {self.coordinate_space.rank}, but received: {len(radii)}" + ) + + min_vals = np.minimum(center - radii, center + radii) + max_vals = np.maximum(center - radii, center + radii) + self.lower_bound = np.minimum(self.lower_bound, min_vals) + self.upper_bound = np.maximum(self.upper_bound, max_vals) + + coords = np.concatenate((center, radii)) + self._add_obj(cast(Sequence[float], coords), id, 1, **kwargs) + def add_line( self, point_a: Sequence[float], From 75ba9445afe6fed7529177403dcf87146a5b1710 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 7 Feb 2024 17:20:12 -0800 Subject: [PATCH 29/48] fix ellipsoid logic --- python/neuroglancer/write_annotations.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 83c586e80..7ec8603fd 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -325,13 +325,9 @@ def add_ellipsoid( f"Expected radii to have length {self.coordinate_space.rank}, but received: {len(radii)}" ) - min_vals = np.minimum(center - radii, center + radii) - max_vals = np.maximum(center - radii, center + radii) - self.lower_bound = np.minimum(self.lower_bound, min_vals) - self.upper_bound = np.maximum(self.upper_bound, max_vals) - - coords = np.concatenate((center, radii)) - self._add_obj(cast(Sequence[float], coords), id, 1, **kwargs) + self.lower_bound = np.minimum(center, self.lower_bound) + self.upper_bound = np.maximum(center, self.upper_bound) + self._add_two_point_obj(center, radii, id, 1, **kwargs) def add_line( self, @@ -363,8 +359,15 @@ def _add_two_point_obj( raise ValueError( f"Expected coordinates to have length {self.coordinate_space.rank}, but received: {len(point_b)}" ) - min_vals = np.minimum(point_a, point_b) - max_vals = np.maximum(point_a, point_b) + if n_spatial_coords == 2: + min_vals = np.minimum(point_a, point_b) + max_vals = np.maximum(point_a, point_b) + elif n_spatial_coords == 1: + min_vals = point_a + max_vals = point_a + else: + raise ValueError(f"Unexpected n_spatial_coords {n_spatial_coords}") + self.lower_bound = np.minimum(self.lower_bound, min_vals) self.upper_bound = np.maximum(self.upper_bound, max_vals) @@ -406,7 +409,7 @@ def _add_obj( for i in range(n_spatial_coords): chunk_index = self.get_chunk_index( - np.array(coords[i * self.rank: (i + 1) * self.rank]) + np.array(coords[i * self.rank : (i + 1) * self.rank]) ) self.annotations_by_chunk[chunk_index].append(annotation) self.annotations.append(annotation) From a2c1932479bff2a95d3032784aa40c0c2f562ce4 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 7 Feb 2024 19:57:04 -0800 Subject: [PATCH 30/48] fixing typing --- python/neuroglancer/write_annotations.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 7ec8603fd..4c6cf62ed 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -20,7 +20,7 @@ import struct from collections import defaultdict from collections.abc import Sequence -from typing import Literal, NamedTuple, Optional, Union, cast +from typing import Literal, NamedTuple, Optional, SupportsInt, Union, cast import numpy as np @@ -363,8 +363,8 @@ def _add_two_point_obj( min_vals = np.minimum(point_a, point_b) max_vals = np.maximum(point_a, point_b) elif n_spatial_coords == 1: - min_vals = point_a - max_vals = point_a + min_vals = np.array(point_a) + max_vals = np.array(point_a) else: raise ValueError(f"Unexpected n_spatial_coords {n_spatial_coords}") @@ -378,7 +378,7 @@ def _add_obj( self, coords: Sequence[float], id: Optional[int], - n_spatial_coords: Optional[int] = 1, + n_spatial_coords: SupportsInt = 1, **kwargs, ): encoded = np.zeros(shape=(), dtype=self.dtype) @@ -407,9 +407,9 @@ def _add_obj( id=id, encoded=encoded.tobytes(), relationships=related_ids ) - for i in range(n_spatial_coords): + for i in range(int(n_spatial_coords)): chunk_index = self.get_chunk_index( - np.array(coords[i * self.rank : (i + 1) * self.rank]) + np.array(coords[i * self.rank: (i + 1) * self.rank]) ) self.annotations_by_chunk[chunk_index].append(annotation) self.annotations.append(annotation) From ed4761ada658a0cd6b9222c09f4e9d3453a8170a Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 7 Feb 2024 20:30:27 -0800 Subject: [PATCH 31/48] ruff formatting --- python/neuroglancer/write_annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 4c6cf62ed..ee8c17bc3 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -409,7 +409,7 @@ def _add_obj( for i in range(int(n_spatial_coords)): chunk_index = self.get_chunk_index( - np.array(coords[i * self.rank: (i + 1) * self.rank]) + np.array(coords[i * self.rank : (i + 1) * self.rank]) ) self.annotations_by_chunk[chunk_index].append(annotation) self.annotations.append(annotation) From bbabf64b36b1d3225edab793ed28dbf47cfc6b38 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 11 Feb 2024 15:04:04 -0800 Subject: [PATCH 32/48] fix: relationship key encoded incorrectly --- python/neuroglancer/write_annotations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index ee8c17bc3..d1728e6ea 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -470,8 +470,8 @@ def _serialize_annotations_by_related_id(self, path, related_id_dict, shard_spec dataset = ts.KvStore.open(spec).result() txn = ts.Transaction() for related_id, annotations in related_id_dict.items(): - # convert the ann.id to a binary representation of a uint64 - key = related_id.to_bytes(8, "little") + # convert the related_id to a binary representation of a uint64 + key = np.ascontiguousarray(related_id, dtype=">u8").tobytes() value = self._encode_multiple_annotations(annotations) dataset.with_transaction(txn)[key] = value txn.commit_async().result() From 524cca2941837baf0a63f7bc4988b737057d928f Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Tue, 13 Feb 2024 09:33:06 -0800 Subject: [PATCH 33/48] fixing dtypes of chunk size --- python/neuroglancer/write_annotations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index d1728e6ea..e11373573 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -254,12 +254,12 @@ def __init__( ) + _get_dtype_for_properties(self.properties) # if chunk_size is an integer, then make it a sequence - if isinstance(experimental_chunk_size, numbers.Integral): + if isinstance(experimental_chunk_size, numbers.Real): self.chunk_size = np.full( - shape=(self.rank,), fill_value=experimental_chunk_size, dtype=np.int32 + shape=(self.rank,), fill_value=experimental_chunk_size, dtype=np.float64 ) else: - chunk_size = cast(Sequence[int], experimental_chunk_size) + chunk_size = cast(Sequence[float], experimental_chunk_size) if len(chunk_size) != self.rank: raise ValueError( f"Expected experimental_chunk_size to have length {self.rank}, but received: {chunk_size}" From 5f80c3cbcd1de3132ca25fad6bff20bfce564686 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 14 Feb 2024 04:16:01 -0800 Subject: [PATCH 34/48] fix: missing related IDs in by_id index --- python/neuroglancer/write_annotations.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index e11373573..d17144199 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -419,7 +419,7 @@ def _add_obj( rel_index_list = rel_index.setdefault(segment_id, []) rel_index_list.append(annotation) - def _serialize_annotations_sharded(self, path, annotations, shard_spec): + def _serialize_annotations_sharded(self, path, annotations, shard_specs): spec = { "driver": "neuroglancer_uint64_sharded", "metadata": shard_spec.to_json(), @@ -430,7 +430,12 @@ def _serialize_annotations_sharded(self, path, annotations, shard_spec): for ann in annotations: # convert the ann.id to a binary representation of a uint64 key = ann.id.to_bytes(8, "little") - dataset.with_transaction(txn)[key] = ann.encoded + value = ann.encoded + for related_ids in ann.relationships: + value += struct.pack(" Date: Wed, 14 Feb 2024 04:17:19 -0800 Subject: [PATCH 35/48] fix: typo --- python/neuroglancer/write_annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index d17144199..9a803bf03 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -419,7 +419,7 @@ def _add_obj( rel_index_list = rel_index.setdefault(segment_id, []) rel_index_list.append(annotation) - def _serialize_annotations_sharded(self, path, annotations, shard_specs): + def _serialize_annotations_sharded(self, path, annotations, shard_spec): spec = { "driver": "neuroglancer_uint64_sharded", "metadata": shard_spec.to_json(), From ca9f066c7ffef9b7abfcb04f0e1388f504f2957a Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 14 Feb 2024 05:46:59 -0800 Subject: [PATCH 36/48] bugfix: byid endian encoding fix --- python/neuroglancer/write_annotations.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 9a803bf03..a2a1dcdde 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -429,12 +429,13 @@ def _serialize_annotations_sharded(self, path, annotations, shard_spec): txn = ts.Transaction() for ann in annotations: # convert the ann.id to a binary representation of a uint64 - key = ann.id.to_bytes(8, "little") + # key = ann.id.to_bytes(8, "little") + key = np.ascontiguousarray(ann.id, dtype=">u8").tobytes() value = ann.encoded for related_ids in ann.relationships: value += struct.pack(" Date: Wed, 14 Feb 2024 13:27:56 -0800 Subject: [PATCH 37/48] fixing np.asarray case --- python/neuroglancer/local_volume.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/neuroglancer/local_volume.py b/python/neuroglancer/local_volume.py index 987361ada..1acf5e3b2 100644 --- a/python/neuroglancer/local_volume.py +++ b/python/neuroglancer/local_volume.py @@ -194,7 +194,7 @@ def get_encoded_subvolume(self, data_format, start, end, scale_key): ): raise ValueError("Invalid downsampling factor.") downsampled_shape = np.asarray( - np.ceil(self.shape / downsample_factor, dtype=np.int64) + np.ceil(self.shape / downsample_factor), dtype=np.int64 ) if np.any(end < start) or np.any(start < 0) or np.any(end > downsampled_shape): raise ValueError("Out of bounds data request.") From fcd47a69a66e3f876c3c1840286703ba5ee072b2 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 16 Feb 2024 15:02:08 -0800 Subject: [PATCH 38/48] fix single layer chunks --- python/neuroglancer/write_annotations.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index a2a1dcdde..7220a7c99 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -521,9 +521,12 @@ def write(self, path: Union[str, pathlib.Path]): sharding_spec = choose_output_spec(len(self.annotations), total_ann_bytes) # calculate the number of chunks in each dimension - num_chunks = np.ceil( - (self.upper_bound - self.lower_bound) / self.chunk_size - ).astype(int) + num_chunks = np.max( + np.full(self.upper_bound.shape, 1, dtype=int), + np.ceil((self.upper_bound - self.lower_bound) / self.chunk_size).astype( + int + ), + ) # make directories os.makedirs(path, exist_ok=True) From 667d203b01c936433fd3791225ef725210effe28 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 16 Feb 2024 15:10:05 -0800 Subject: [PATCH 39/48] fixing chunk size for single planes --- python/neuroglancer/write_annotations.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 7220a7c99..af3fa94d5 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -521,12 +521,11 @@ def write(self, path: Union[str, pathlib.Path]): sharding_spec = choose_output_spec(len(self.annotations), total_ann_bytes) # calculate the number of chunks in each dimension - num_chunks = np.max( - np.full(self.upper_bound.shape, 1, dtype=int), - np.ceil((self.upper_bound - self.lower_bound) / self.chunk_size).astype( - int - ), - ) + num_chunks = np.ceil( + (self.upper_bound - self.lower_bound) / self.chunk_size + ).astype(int) + + np.maximum(num_chunks, np.full(num_chunks.shape, 1, dtype=int)) # make directories os.makedirs(path, exist_ok=True) From cf5579e7c0dbe51bf99730dfde35a1be75529c24 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Fri, 16 Feb 2024 15:14:43 -0800 Subject: [PATCH 40/48] fix num_chunks --- python/neuroglancer/write_annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index af3fa94d5..c695ec4be 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -525,7 +525,7 @@ def write(self, path: Union[str, pathlib.Path]): (self.upper_bound - self.lower_bound) / self.chunk_size ).astype(int) - np.maximum(num_chunks, np.full(num_chunks.shape, 1, dtype=int)) + num_chunks = np.maximum(num_chunks, np.full(num_chunks.shape, 1, dtype=int)) # make directories os.makedirs(path, exist_ok=True) From 20c40e6e9d187a31c5f8f3065f2bb2253eb6131b Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 17 Feb 2024 17:55:19 -0800 Subject: [PATCH 41/48] fixing spatial indices for points --- python/neuroglancer/write_annotations.py | 26 ++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index c695ec4be..a4e08e01a 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -57,6 +57,7 @@ class Annotation(NamedTuple): id: int encoded: bytes relationships: Sequence[Sequence[int]] + geometry: Sequence[float] _PROPERTY_DTYPES: dict[ @@ -404,14 +405,14 @@ def _add_obj( id = len(self.annotations) annotation = Annotation( - id=id, encoded=encoded.tobytes(), relationships=related_ids + id=id, encoded=encoded.tobytes(), relationships=related_ids, geometry=coords ) - for i in range(int(n_spatial_coords)): - chunk_index = self.get_chunk_index( - np.array(coords[i * self.rank : (i + 1) * self.rank]) - ) - self.annotations_by_chunk[chunk_index].append(annotation) + # for i in range(int(n_spatial_coords)): + # chunk_index = self.get_chunk_index( + # np.array(coords[i * self.rank : (i + 1) * self.rank]) + # ) + # self.annotations_by_chunk[chunk_index].append(annotation) self.annotations.append(annotation) for i, segment_ids in enumerate(related_ids): for segment_id in segment_ids: @@ -492,10 +493,10 @@ def _serialize_annotation_chunk_sharded( } dataset = ts.KvStore.open(spec).result() txn = ts.Transaction() - lower_chunk_index = self.get_chunk_index(self.lower_bound) + # lower_chunk_index = self.get_chunk_index(self.lower_bound) for chunk_index, annotations in annotations_by_chunk.items(): - chunk_index = np.array(chunk_index) - np.array(lower_chunk_index) + # chunk_index = np.array(chunk_index) - np.array(lower_chunk_index) # calculate the compressed morton code for the chunk index key = compressed_morton_code(chunk_index, max_sizes) # convert the np.uint64 to a binary representation of a uint64 @@ -527,12 +528,18 @@ def write(self, path: Union[str, pathlib.Path]): num_chunks = np.maximum(num_chunks, np.full(num_chunks.shape, 1, dtype=int)) + metadata["upper_bound"] = self.lower_bound + (num_chunks * self.chunk_size) # make directories os.makedirs(path, exist_ok=True) for relationship in self.relationships: os.makedirs(os.path.join(path, f"rel_{relationship}"), exist_ok=True) os.makedirs(os.path.join(path, "by_id"), exist_ok=True) os.makedirs(os.path.join(path, "spatial0"), exist_ok=True) + for ann in self.annotations: + if self.annotation_type == "point": + # get the first self.rank elements of the geometry array + chunk_index = self.get_chunk_index(ann.geometry - self.lower_bound) + self.annotations_by_chunk[chunk_index].append(ann) total_chunks = len(self.annotations_by_chunk) spatial_sharding_spec = choose_output_spec( @@ -547,6 +554,7 @@ def write(self, path: Union[str, pathlib.Path]): "limit": len(self.annotations), } ] + spatial_sharding_spec = None # write annotations by spatial chunk if spatial_sharding_spec is not None: self._serialize_annotation_chunk_sharded( @@ -557,7 +565,9 @@ def write(self, path: Union[str, pathlib.Path]): ) metadata["spatial"][0]["sharding"] = spatial_sharding_spec.to_json() else: + # lower_chunk_index = self.get_chunk_index(self.lower_bound) for chunk_index, annotations in self.annotations_by_chunk.items(): + # chunk_index = np.array(chunk_index) - np.array(lower_chunk_index) chunk_name = "_".join([str(c) for c in chunk_index]) filepath = os.path.join(path, "spatial0", chunk_name) with open(filepath, "wb") as f: From e6398e89b277291098b4ada7af0c9d03a7bd63f2 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sat, 17 Feb 2024 21:41:03 -0800 Subject: [PATCH 42/48] fixed sharded spatial index writing --- python/neuroglancer/write_annotations.py | 61 +++++++++++++++--------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index a4e08e01a..146727d99 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -21,7 +21,7 @@ from collections import defaultdict from collections.abc import Sequence from typing import Literal, NamedTuple, Optional, SupportsInt, Union, cast - +import math import numpy as np try: @@ -150,29 +150,44 @@ def choose_output_spec( ) -def compressed_morton_code(position: Sequence[int], shape: Sequence[int]): - """Converts a position in a grid to a compressed Morton code. +def compressed_morton_code(gridpt, grid_size): + """Converts a grid point to a compressed morton code. + from cloud-volume""" + if hasattr(gridpt, "__len__") and len(gridpt) == 0: # generators don't have len + return np.zeros((0,), dtype=np.uint32) - Args: - position: A sequence of integers representing the position in the grid. - shape: A sequence of integers representing the shape of the grid. + gridpt = np.asarray(gridpt, dtype=np.uint32) + single_input = False + if gridpt.ndim == 1: + gridpt = np.atleast_2d(gridpt) + single_input = True - Returns: - int: The compressed Morton code. - """ - output_bit = 0 - rank = len(position) - output_num = 0 - for bit in range(32): - for dim in range(rank - 1, -1, -1): - if (shape[dim] - 1) >> bit: - output_num |= ((position[dim] >> bit) & 1) << output_bit - output_bit += 1 - if output_bit == 64: - # In Python, we don't have the 32-bit limitation, so we don't need to split into high and low. - # But you can add code here to handle or signal overflow if needed. - pass - return output_num + code = np.zeros((gridpt.shape[0],), dtype=np.uint64) + num_bits = [math.ceil(math.log2(size)) for size in grid_size] + j = np.uint64(0) + one = np.uint64(1) + + if sum(num_bits) > 64: + raise ValueError( + f"Unable to represent grids that require more than 64 bits. Grid size {grid_size} requires {num_bits} bits." + ) + + max_coords = np.max(gridpt, axis=0) + if np.any(max_coords >= grid_size): + raise ValueError( + f"Unable to represent grid points larger than the grid. Grid size: {grid_size} Grid points: {gridpt}" + ) + + for i in range(max(num_bits)): + for dim in range(3): + if 2**i < grid_size[dim]: + bit = ((np.uint64(gridpt[:, dim]) >> np.uint64(i)) & one) << j + code |= bit + j += one + + if single_input: + return code[0] + return code def _get_dtype_for_geometry(annotation_type: AnnotationType, rank: int): @@ -554,7 +569,7 @@ def write(self, path: Union[str, pathlib.Path]): "limit": len(self.annotations), } ] - spatial_sharding_spec = None + # spatial_sharding_spec = None # write annotations by spatial chunk if spatial_sharding_spec is not None: self._serialize_annotation_chunk_sharded( From afbc23df785c203fc3e5c355831a60323e9fc960 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Sun, 18 Feb 2024 08:34:23 -0800 Subject: [PATCH 43/48] generlizing spatial bins with rtree --- python/neuroglancer/write_annotations.py | 37 +++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 146727d99..3fd5dab52 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -20,9 +20,11 @@ import struct from collections import defaultdict from collections.abc import Sequence +from itertools import product from typing import Literal, NamedTuple, Optional, SupportsInt, Union, cast import math import numpy as np +import rtree try: import tensorstore as ts @@ -57,7 +59,6 @@ class Annotation(NamedTuple): id: int encoded: bytes relationships: Sequence[Sequence[int]] - geometry: Sequence[float] _PROPERTY_DTYPES: dict[ @@ -289,6 +290,9 @@ def __init__( shape=(self.rank,), fill_value=float("-inf"), dtype=np.float32 ) self.related_annotations = [{} for _ in self.relationships] + p = rtree.index.Property() + p.dimension = self.rank + self.rtree = rtree.index.Index(properties=p) def get_chunk_index(self, coords): return tuple((coords // self.chunk_size).astype(np.int32)) @@ -420,9 +424,15 @@ def _add_obj( id = len(self.annotations) annotation = Annotation( - id=id, encoded=encoded.tobytes(), relationships=related_ids, geometry=coords + id=id, encoded=encoded.tobytes(), relationships=related_ids ) + spatial_points = coords[: n_spatial_coords * self.rank] + spatial_points = np.reshape(spatial_points, (self.rank, n_spatial_coords)) + lower_bound = np.min(spatial_points, axis=1) + upper_bound = np.max(spatial_points, axis=1) + self.rtree.insert(id, tuple(lower_bound) + tuple(upper_bound), obj=annotation) + # for i in range(int(n_spatial_coords)): # chunk_index = self.get_chunk_index( # np.array(coords[i * self.rank : (i + 1) * self.rank]) @@ -550,11 +560,24 @@ def write(self, path: Union[str, pathlib.Path]): os.makedirs(os.path.join(path, f"rel_{relationship}"), exist_ok=True) os.makedirs(os.path.join(path, "by_id"), exist_ok=True) os.makedirs(os.path.join(path, "spatial0"), exist_ok=True) - for ann in self.annotations: - if self.annotation_type == "point": - # get the first self.rank elements of the geometry array - chunk_index = self.get_chunk_index(ann.geometry - self.lower_bound) - self.annotations_by_chunk[chunk_index].append(ann) + + # Generate all combinations of coordinates + coordinates = product(*(range(n) for n in num_chunks)) + + # Iterate over the grid + for cell in coordinates: + # Query the rtree index for annotations in the current chunk + lower_bound = self.lower_bound + np.array(cell) * self.chunk_size + upper_bound = lower_bound + self.chunk_size + coords = np.concatenate((lower_bound, upper_bound)) + chunk_annotations = self.rtree.intersection(tuple(coords), objects="raw") + self.annotations_by_chunk[cell] = list(chunk_annotations) + + # for ann in self.annotations: + # if self.annotation_type == "point": + # # get the first self.rank elements of the geometry array + # chunk_index = self.get_chunk_index(ann.geometry - self.lower_bound) + # self.annotations_by_chunk[chunk_index].append(ann) total_chunks = len(self.annotations_by_chunk) spatial_sharding_spec = choose_output_spec( From a8c7a0f2506430609614faec892e893078e11939 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 19 Feb 2024 11:22:10 -0800 Subject: [PATCH 44/48] improved generalization of upper and lower bound --- python/neuroglancer/write_annotations.py | 127 ++++++++++++----------- 1 file changed, 66 insertions(+), 61 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 3fd5dab52..f78a08da4 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -14,6 +14,7 @@ import json import logging +import math import numbers import os import pathlib @@ -21,10 +22,10 @@ from collections import defaultdict from collections.abc import Sequence from itertools import product -from typing import Literal, NamedTuple, Optional, SupportsInt, Union, cast -import math +from typing import Literal, NamedTuple, Optional, Union, cast + import numpy as np -import rtree +import rtree # type: ignore try: import tensorstore as ts @@ -307,9 +308,7 @@ def add_point(self, point: Sequence[float], id: Optional[int] = None, **kwargs): f"Expected point to have length {self.coordinate_space.rank}, but received: {len(point)}" ) - self.lower_bound = np.minimum(self.lower_bound, point) - self.upper_bound = np.maximum(self.upper_bound, point) - self._add_obj(point, id, 1, **kwargs) + self._add_obj(point, id, np.array(point), np.array(point), **kwargs) def add_axis_aligned_bounding_box( self, @@ -322,7 +321,11 @@ def add_axis_aligned_bounding_box( raise ValueError( f"Expected annotation type axis_aligned_bounding_box, but received: {self.annotation_type}" ) - self._add_two_point_obj(point_a, point_b, id, 2, **kwargs) + lower_bound = np.minimum(point_a, point_b) + upper_bound = np.maximum(point_a, point_b) + self._add_two_point_obj( + point_a, point_b, lower_bound, upper_bound, id, **kwargs + ) def add_ellipsoid( self, @@ -345,9 +348,9 @@ def add_ellipsoid( f"Expected radii to have length {self.coordinate_space.rank}, but received: {len(radii)}" ) - self.lower_bound = np.minimum(center, self.lower_bound) - self.upper_bound = np.maximum(center, self.upper_bound) - self._add_two_point_obj(center, radii, id, 1, **kwargs) + lower_bound = np.array(center) - np.array(radii) + upper_bound = np.array(center) + np.array(radii) + self._add_two_point_obj(center, radii, lower_bound, upper_bound, id, **kwargs) def add_line( self, @@ -360,14 +363,20 @@ def add_line( raise ValueError( f"Expected annotation type line, but received: {self.annotation_type}" ) - self._add_two_point_obj(point_a, point_b, id, 2, **kwargs) + lower_bound = np.minimum(point_a, point_b) + upper_bound = np.maximum(point_a, point_b) + + self._add_two_point_obj( + point_a, point_b, lower_bound, upper_bound, id, **kwargs + ) def _add_two_point_obj( self, point_a: Sequence[float], point_b: Sequence[float], + lower_bound: np.ndarray, + upper_bound: np.ndarray, id: Optional[int] = None, - n_spatial_coords: Optional[int] = 2, **kwargs, ): if len(point_a) != self.coordinate_space.rank: @@ -379,28 +388,27 @@ def _add_two_point_obj( raise ValueError( f"Expected coordinates to have length {self.coordinate_space.rank}, but received: {len(point_b)}" ) - if n_spatial_coords == 2: - min_vals = np.minimum(point_a, point_b) - max_vals = np.maximum(point_a, point_b) - elif n_spatial_coords == 1: - min_vals = np.array(point_a) - max_vals = np.array(point_a) - else: - raise ValueError(f"Unexpected n_spatial_coords {n_spatial_coords}") - - self.lower_bound = np.minimum(self.lower_bound, min_vals) - self.upper_bound = np.maximum(self.upper_bound, max_vals) coords = np.concatenate((point_a, point_b)) - self._add_obj(cast(Sequence[float], coords), id, n_spatial_coords, **kwargs) + self._add_obj( + cast(Sequence[float], coords), + id, + lower_bound=upper_bound, + upper_bound=upper_bound, + **kwargs, + ) def _add_obj( self, coords: Sequence[float], id: Optional[int], - n_spatial_coords: SupportsInt = 1, + lower_bound: np.ndarray, + upper_bound: np.ndarray, **kwargs, ): + self.lower_bound = np.minimum(self.lower_bound, lower_bound) + self.upper_bound = np.maximum(self.upper_bound, upper_bound) + encoded = np.zeros(shape=(), dtype=self.dtype) encoded[()]["geometry"] = coords @@ -427,17 +435,14 @@ def _add_obj( id=id, encoded=encoded.tobytes(), relationships=related_ids ) - spatial_points = coords[: n_spatial_coords * self.rank] - spatial_points = np.reshape(spatial_points, (self.rank, n_spatial_coords)) - lower_bound = np.min(spatial_points, axis=1) - upper_bound = np.max(spatial_points, axis=1) + # spatial_points = np.array(coords[: n_spatial_coords * self.rank]) + # spatial_points = np.reshape( + # spatial_points, [self.rank, cast(SupportsIndex, n_spatial_coords)] + # ) + # lower_bound = np.min(spatial_points, axis=1) + # upper_bound = np.max(spatial_points, axis=1) self.rtree.insert(id, tuple(lower_bound) + tuple(upper_bound), obj=annotation) - # for i in range(int(n_spatial_coords)): - # chunk_index = self.get_chunk_index( - # np.array(coords[i * self.rank : (i + 1) * self.rank]) - # ) - # self.annotations_by_chunk[chunk_index].append(annotation) self.annotations.append(annotation) for i, segment_ids in enumerate(related_ids): for segment_id in segment_ids: @@ -508,9 +513,7 @@ def _serialize_annotations_by_related_id(self, path, related_id_dict, shard_spec dataset.with_transaction(txn)[key] = value txn.commit_async().result() - def _serialize_annotation_chunk_sharded( - self, path, annotations_by_chunk, shard_spec, max_sizes - ): + def _serialize_annotation_chunk_sharded(self, path, shard_spec, max_sizes): spec = { "driver": "neuroglancer_uint64_sharded", "metadata": shard_spec.to_json(), @@ -518,16 +521,22 @@ def _serialize_annotation_chunk_sharded( } dataset = ts.KvStore.open(spec).result() txn = ts.Transaction() - # lower_chunk_index = self.get_chunk_index(self.lower_bound) - for chunk_index, annotations in annotations_by_chunk.items(): - # chunk_index = np.array(chunk_index) - np.array(lower_chunk_index) - # calculate the compressed morton code for the chunk index - key = compressed_morton_code(chunk_index, max_sizes) + # Generate all combinations of coordinates + coordinates = product(*(range(n) for n in max_sizes)) + + # Iterate over the grid + for cell in coordinates: + # Query the rtree index for annotations in the current chunk + lower_bound = self.lower_bound + np.array(cell) * self.chunk_size + upper_bound = lower_bound + self.chunk_size + coords = np.concatenate((lower_bound, upper_bound)) + chunk_annotations = self.rtree.intersection(tuple(coords), objects="raw") + key = compressed_morton_code(cell, max_sizes) # convert the np.uint64 to a binary representation of a uint64 # using big endian representation key = np.ascontiguousarray(key, dtype=">u8").tobytes() - value = self._encode_multiple_annotations(annotations) + value = self._encode_multiple_annotations(chunk_annotations) dataset.with_transaction(txn)[key] = value txn.commit_async().result() @@ -561,18 +570,6 @@ def write(self, path: Union[str, pathlib.Path]): os.makedirs(os.path.join(path, "by_id"), exist_ok=True) os.makedirs(os.path.join(path, "spatial0"), exist_ok=True) - # Generate all combinations of coordinates - coordinates = product(*(range(n) for n in num_chunks)) - - # Iterate over the grid - for cell in coordinates: - # Query the rtree index for annotations in the current chunk - lower_bound = self.lower_bound + np.array(cell) * self.chunk_size - upper_bound = lower_bound + self.chunk_size - coords = np.concatenate((lower_bound, upper_bound)) - chunk_annotations = self.rtree.intersection(tuple(coords), objects="raw") - self.annotations_by_chunk[cell] = list(chunk_annotations) - # for ann in self.annotations: # if self.annotation_type == "point": # # get the first self.rank elements of the geometry array @@ -597,19 +594,27 @@ def write(self, path: Union[str, pathlib.Path]): if spatial_sharding_spec is not None: self._serialize_annotation_chunk_sharded( os.path.join(path, "spatial0"), - self.annotations_by_chunk, spatial_sharding_spec, num_chunks.tolist(), ) metadata["spatial"][0]["sharding"] = spatial_sharding_spec.to_json() else: - # lower_chunk_index = self.get_chunk_index(self.lower_bound) - for chunk_index, annotations in self.annotations_by_chunk.items(): - # chunk_index = np.array(chunk_index) - np.array(lower_chunk_index) - chunk_name = "_".join([str(c) for c in chunk_index]) + # Generate all combinations of coordinates + coordinates = product(*(range(n) for n in num_chunks)) + + # Iterate over the grid + for cell in coordinates: + # Query the rtree index for annotations in the current chunk + lower_bound = self.lower_bound + np.array(cell) * self.chunk_size + upper_bound = lower_bound + self.chunk_size + coords = np.concatenate((lower_bound, upper_bound)) + chunk_annotations = self.rtree.intersection( + tuple(coords), objects="raw" + ) + chunk_name = "_".join([str(c) for c in cell]) filepath = os.path.join(path, "spatial0", chunk_name) with open(filepath, "wb") as f: - self._serialize_annotations(f, annotations) + self._serialize_annotations(f, chunk_annotations) # write annotations by id if sharding_spec is not None: From 2a5eeea5eba376ac75fcc2b319eed1e3b83011c8 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 19 Feb 2024 11:29:02 -0800 Subject: [PATCH 45/48] remove comments, add sharded option, fix generator --- python/neuroglancer/write_annotations.py | 30 +++++++----------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index f78a08da4..87ec7be3a 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -435,12 +435,6 @@ def _add_obj( id=id, encoded=encoded.tobytes(), relationships=related_ids ) - # spatial_points = np.array(coords[: n_spatial_coords * self.rank]) - # spatial_points = np.reshape( - # spatial_points, [self.rank, cast(SupportsIndex, n_spatial_coords)] - # ) - # lower_bound = np.min(spatial_points, axis=1) - # upper_bound = np.max(spatial_points, axis=1) self.rtree.insert(id, tuple(lower_bound) + tuple(upper_bound), obj=annotation) self.annotations.append(annotation) @@ -460,7 +454,6 @@ def _serialize_annotations_sharded(self, path, annotations, shard_spec): txn = ts.Transaction() for ann in annotations: # convert the ann.id to a binary representation of a uint64 - # key = ann.id.to_bytes(8, "little") key = np.ascontiguousarray(ann.id, dtype=">u8").tobytes() value = ann.encoded for related_ids in ann.relationships: @@ -531,7 +524,9 @@ def _serialize_annotation_chunk_sharded(self, path, shard_spec, max_sizes): lower_bound = self.lower_bound + np.array(cell) * self.chunk_size upper_bound = lower_bound + self.chunk_size coords = np.concatenate((lower_bound, upper_bound)) - chunk_annotations = self.rtree.intersection(tuple(coords), objects="raw") + chunk_annotations = list( + self.rtree.intersection(tuple(coords), objects="raw") + ) key = compressed_morton_code(cell, max_sizes) # convert the np.uint64 to a binary representation of a uint64 # using big endian representation @@ -541,7 +536,7 @@ def _serialize_annotation_chunk_sharded(self, path, shard_spec, max_sizes): txn.commit_async().result() - def write(self, path: Union[str, pathlib.Path]): + def write(self, path: Union[str, pathlib.Path], write_sharded: bool = False): metadata = { "@type": "neuroglancer_annotations_v1", "dimensions": self.coordinate_space.to_json(), @@ -570,12 +565,6 @@ def write(self, path: Union[str, pathlib.Path]): os.makedirs(os.path.join(path, "by_id"), exist_ok=True) os.makedirs(os.path.join(path, "spatial0"), exist_ok=True) - # for ann in self.annotations: - # if self.annotation_type == "point": - # # get the first self.rank elements of the geometry array - # chunk_index = self.get_chunk_index(ann.geometry - self.lower_bound) - # self.annotations_by_chunk[chunk_index].append(ann) - total_chunks = len(self.annotations_by_chunk) spatial_sharding_spec = choose_output_spec( total_chunks, total_ann_bytes + 8 * len(self.annotations) + 8 * total_chunks @@ -589,9 +578,8 @@ def write(self, path: Union[str, pathlib.Path]): "limit": len(self.annotations), } ] - # spatial_sharding_spec = None # write annotations by spatial chunk - if spatial_sharding_spec is not None: + if (spatial_sharding_spec is not None) and write_sharded: self._serialize_annotation_chunk_sharded( os.path.join(path, "spatial0"), spatial_sharding_spec, @@ -608,8 +596,8 @@ def write(self, path: Union[str, pathlib.Path]): lower_bound = self.lower_bound + np.array(cell) * self.chunk_size upper_bound = lower_bound + self.chunk_size coords = np.concatenate((lower_bound, upper_bound)) - chunk_annotations = self.rtree.intersection( - tuple(coords), objects="raw" + chunk_annotations = list( + self.rtree.intersection(tuple(coords), objects="raw") ) chunk_name = "_".join([str(c) for c in cell]) filepath = os.path.join(path, "spatial0", chunk_name) @@ -617,7 +605,7 @@ def write(self, path: Union[str, pathlib.Path]): self._serialize_annotations(f, chunk_annotations) # write annotations by id - if sharding_spec is not None: + if (sharding_spec is not None) and write_sharded: self._serialize_annotations_sharded( os.path.join(path, "by_id"), self.annotations, sharding_spec ) @@ -635,7 +623,7 @@ def write(self, path: Union[str, pathlib.Path]): total_ann_bytes + 8 * len(self.annotations) + 8 * total_chunks, ) rel_md = {"id": relationship, "key": f"rel_{relationship}"} - if relationship_sharding_spec is not None: + if (relationship_sharding_spec is not None) and write_sharded: rel_md["sharding"] = relationship_sharding_spec.to_json() self._serialize_annotations_by_related_id( os.path.join(path, f"rel_{relationship}"), From 1d502eeb3280b2c39283f1e111b54603ffcef0fb Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 19 Feb 2024 11:32:42 -0800 Subject: [PATCH 46/48] adding rtree dependancy --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 810021605..7bf48a2a5 100755 --- a/setup.py +++ b/setup.py @@ -321,6 +321,7 @@ def _no_guess_dev_version(version): "google-apitools", "google-auth", "atomicwrites", + "rtree", ], extras_require={ "test": [ From a781732efcc5d5aa4a53d8e310e280e3827fdd7c Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Mon, 19 Feb 2024 17:03:10 -0800 Subject: [PATCH 47/48] switch to sharded writing as default --- python/neuroglancer/write_annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 87ec7be3a..4925641ee 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -536,7 +536,7 @@ def _serialize_annotation_chunk_sharded(self, path, shard_spec, max_sizes): txn.commit_async().result() - def write(self, path: Union[str, pathlib.Path], write_sharded: bool = False): + def write(self, path: Union[str, pathlib.Path], write_sharded: bool = True): metadata = { "@type": "neuroglancer_annotations_v1", "dimensions": self.coordinate_space.to_json(), From 066b8013cd786e4341d3717653f0a8bf72ca0c59 Mon Sep 17 00:00:00 2001 From: Forrest Collman Date: Wed, 21 Feb 2024 16:42:58 -0800 Subject: [PATCH 48/48] removing chunks with no items --- python/neuroglancer/write_annotations.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/python/neuroglancer/write_annotations.py b/python/neuroglancer/write_annotations.py index 4925641ee..60ece4601 100644 --- a/python/neuroglancer/write_annotations.py +++ b/python/neuroglancer/write_annotations.py @@ -527,12 +527,13 @@ def _serialize_annotation_chunk_sharded(self, path, shard_spec, max_sizes): chunk_annotations = list( self.rtree.intersection(tuple(coords), objects="raw") ) - key = compressed_morton_code(cell, max_sizes) - # convert the np.uint64 to a binary representation of a uint64 - # using big endian representation - key = np.ascontiguousarray(key, dtype=">u8").tobytes() - value = self._encode_multiple_annotations(chunk_annotations) - dataset.with_transaction(txn)[key] = value + if len(chunk_annotations) > 0: + key = compressed_morton_code(cell, max_sizes) + # convert the np.uint64 to a binary representation of a uint64 + # using big endian representation + key = np.ascontiguousarray(key, dtype=">u8").tobytes() + value = self._encode_multiple_annotations(chunk_annotations) + dataset.with_transaction(txn)[key] = value txn.commit_async().result() @@ -578,6 +579,7 @@ def write(self, path: Union[str, pathlib.Path], write_sharded: bool = True): "limit": len(self.annotations), } ] + spatial_sharding_spec = None # write annotations by spatial chunk if (spatial_sharding_spec is not None) and write_sharded: self._serialize_annotation_chunk_sharded( @@ -599,10 +601,11 @@ def write(self, path: Union[str, pathlib.Path], write_sharded: bool = True): chunk_annotations = list( self.rtree.intersection(tuple(coords), objects="raw") ) - chunk_name = "_".join([str(c) for c in cell]) - filepath = os.path.join(path, "spatial0", chunk_name) - with open(filepath, "wb") as f: - self._serialize_annotations(f, chunk_annotations) + if len(chunk_annotations) > 0: + chunk_name = "_".join([str(c) for c in cell]) + filepath = os.path.join(path, "spatial0", chunk_name) + with open(filepath, "wb") as f: + self._serialize_annotations(f, chunk_annotations) # write annotations by id if (sharding_spec is not None) and write_sharded: