Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose chunk requests to python WIP #173

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions check_chunk_requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import numpy as np
import z5py

p = './data.n5'
f = z5py.File(p, 'a')

data = np.arange(100*100).reshape((100, 100))
ds = f.require_dataset('test', shape=data.shape, dtype=data.dtype, chunks=(10, 10))

chunk_list = ds.get_chunks_in_request(np.s_[5:15, 3:95])
print(len(chunk_list))

chunk_list, chunk_bbs = ds.get_chunks_in_request(np.s_[5:15, 3:95], True)

for chunk_id, chunk_bb in zip(chunk_list, chunk_bbs):
print(chunk_id)
print(chunk_bb)
print()
1 change: 0 additions & 1 deletion conda-recipe/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ cmake .. \
-DCMAKE_C_COMPILER=${CC} \
-DCMAKE_CXX_COMPILER=${CXX} \
-DCMAKE_BUILD_TYPE=RELEASE \
-DCMAKE_OSX_DEPLOYMENT_TARGET=10.9\
-DCMAKE_INSTALL_PREFIX=${PREFIX} \
-DCMAKE_PREFIX_PATH=${PREFIX} \
\
Expand Down
4 changes: 2 additions & 2 deletions conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ requirements:
- python {{PY_VER}}
- cmake
- boost-cpp >=1.63
- xtensor 0.19.3
- xtensor-python 0.22.1
- xtensor
- xtensor-python
- numpy >=1.15
- nlohmann_json
- blosc
Expand Down
1 change: 1 addition & 0 deletions include/z5/multiarray/xtensor_access.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ namespace multiarray {

// check if this chunk exists, if not fill output with fill value
if(!ds.chunkExists(chunkId)) {
// std::cout << "Chunk does not exist" << std::endl;
view = fillValue;;
continue;
}
Expand Down
70 changes: 70 additions & 0 deletions matthews_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# TODO remove before merging into master

import z5py
import numpy as np
import os
from shutil import copyfile, rmtree

filename = 'test_chunk_requests' + '.n5'
f = z5py.File(filename)

test_shape = (30, 10, 5, 20, 3, 32)
# test_shape = (30, 10, 5)
test_data = np.random.rand(*test_shape)
chunk_shape = (10, 1, 1, 12, 1, 6)
# chunk_shape = (10, 1, 1)


#remove dataset if it already exists before creating it
if os.path.isdir(filename + '/dim_test'): rmtree(filename + '/dim_test')

print(filename)
dset = f.create_dataset('dim_test',
shape=test_shape,
chunks=chunk_shape,
dtype=test_data.dtype)
dset[:] = test_data

read = [2, 8, 3, 4, 0, 1, 16, 19, 2, 3, 11, 19]
subset_test = test_data[read[0]:read[1],
read[2]:read[3],
read[4]:read[5],
read[6]:read[7],
read[8]:read[9],
read[10]:read[11]]


chunk_ids = dset.get_chunks_in_request(np.s_[read[0]:read[1],
read[2]:read[3],
read[4]:read[5],
read[6]:read[7],
read[8]:read[9],
read[10]:read[11]])

compare_file = filename + '_compare' + '.n5'
if os.path.isdir(compare_file): os.system('rm -r ' + compare_file)

# create a new empty data container
f = z5py.File(compare_file)
dset = f.create_dataset('dim_test',
shape=test_shape,
chunks=chunk_shape,
dtype=test_data.dtype)

# copy just those chunks into container
for ids in chunk_ids:
ids_str = '/'.join(map(str, ids))
# make the chunk dir if it's not there
os.makedirs(os.path.dirname(compare_file + '/dim_test/' + ids_str), exist_ok=True)
copyfile(filename + '/dim_test/' + ids_str, compare_file + '/dim_test/' + ids_str)

# performe the subset chunk read
compare_data = dset[read[0]:read[1],
read[2]:read[3],
read[4]:read[5],
read[6]:read[7],
read[8]:read[9],
read[10]:read[11]]

if not np.all(subset_test == compare_data):
raise ValueError('The chunk id list did not produce an array that matched the source')
57 changes: 57 additions & 0 deletions src/python/lib/dataset.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,63 @@ namespace z5 {
return filesystem::openDataset(handle);
}
))

.def("chunks_in_request", [](const Dataset & ds,
const std::vector<size_t> & roiBegin,
const std::vector<size_t> & roiShape){
std::vector<std::vector<size_t>> chunkIds;
{
// TODO I am not sure if it's thread-safe when we push back to the vectores
// inside of the gil release
py::gil_scoped_release lift_gil;
const auto & chunking = ds.chunking();
chunking.getBlocksOverlappingRoi(roiBegin, roiShape, chunkIds);

// need to reverse the chunk ids for n5 datasets
if(!ds.isZarr()) {
for(auto & chunkId : chunkIds) {
std::reverse(chunkId.begin(), chunkId.end());
}
}
}
return chunkIds;
})


.def("chunks_and_slices_in_request", [](const Dataset & ds,
const std::vector<size_t> & roiBegin,
const std::vector<size_t> & roiShape){
std::vector<std::vector<size_t>> chunkIds;
std::vector<std::vector<size_t>> chunkBegins;
std::vector<std::vector<size_t>> chunkShapes;
{
// TODO I am not sure if it's thread-safe when we push back to the vectores
// inside of the gil release
py::gil_scoped_release lift_gil;
const auto & chunking = ds.chunking();
chunking.getBlocksOverlappingRoi(roiBegin, roiShape, chunkIds);
// need to reverse the chunk ids for n5 datasets
if(!ds.isZarr()) {
for(auto & chunkId : chunkIds) {
std::reverse(chunkId.begin(), chunkId.end());
}
}

types::ShapeType offsetInRequest, requestShape, offsetInChunk;
for(const auto & chunkId : chunkIds) {
chunking.getCoordinatesInRoi(chunkId,
roiBegin,
roiShape,
offsetInRequest,
requestShape,
offsetInChunk);
chunkBegins.push_back(offsetInChunk);
chunkShapes.push_back(requestShape);
}
// TODO if this is an n5 dataset we probably need to reverse the chunk ids
}
return std::make_tuple(chunkIds, chunkBegins, chunkShapes);
})
;

// export I/O for all dtypes
Expand Down
21 changes: 21 additions & 0 deletions src/python/module/z5py/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,3 +514,24 @@ def get_chunk_shape(self, chunk_indices):
tuple - shape of the chunk
"""
return self._impl.getChunkShape(chunk_indices)

# TODO name??
# WIP implement
def get_chunks_in_request(self, index, return_chunk_slices=False):
roi_begin, shape, _ = self.index_to_roi(index)

# empty request
if 0 in shape:
return [], [] if return_chunk_slices else []

if return_chunk_slices:
(chunk_list,
chunk_bb_starts,
chunk_bb_shapes) = self._impl.chunks_and_slices_in_request(roi_begin, shape)
chunk_slices = [tuple(slice(start, start + sh) for start, sh in zip(bb_start, bb_shape))
for bb_start, bb_shape in zip(chunk_bb_starts, chunk_bb_shapes)]
assert len(chunk_list) == len(chunk_slices)
return chunk_list, chunk_slices
else:
chunk_list = self._impl.chunks_in_request(roi_begin, shape)
return chunk_list