Skip to content

Commit

Permalink
[python] Add VFS binding in pybind11 (#2882)
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv authored Oct 15, 2024
1 parent 70f66cf commit 6395e9b
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 9 deletions.
1 change: 1 addition & 0 deletions apis/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def run(self):
"src/tiledbsoma/common.cc",
"src/tiledbsoma/reindexer.cc",
"src/tiledbsoma/query_condition.cc",
"src/tiledbsoma/soma_vfs.cc",
"src/tiledbsoma/soma_context.cc",
"src/tiledbsoma/soma_array.cc",
"src/tiledbsoma/soma_object.cc",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ def from_isolated_h5ad(
experiment, not in append mode, but allowing us to still have the bulk of the ingestor code
to be non-duplicated between non-append mode and append mode.
"""
tiledb_ctx = None if context is None else context.tiledb_ctx
with read_h5ad(h5ad_file_name, mode="r", ctx=tiledb_ctx) as adata:
with read_h5ad(h5ad_file_name, mode="r", ctx=context) as adata:
return cls.from_isolated_anndata(
adata,
measurement_name=measurement_name,
Expand Down Expand Up @@ -434,8 +433,7 @@ def from_h5ad_append_on_experiment(
"""Extends registration data to one more H5AD input file."""
tiledbsoma.logging.logger.info(f"Registration: registering {h5ad_file_name}.")

tiledb_ctx = None if context is None else context.tiledb_ctx
with read_h5ad(h5ad_file_name, mode="r", ctx=tiledb_ctx) as adata:
with read_h5ad(h5ad_file_name, mode="r", ctx=context) as adata:
return cls.from_anndata_append_on_experiment(
adata,
previous,
Expand Down
10 changes: 6 additions & 4 deletions apis/python/src/tiledbsoma/io/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
import pyarrow as pa
from anndata._core import file_backing

import tiledb

from .. import pytiledbsoma as clib
from .._exception import SOMAError
from .._types import Path
from ..options import SOMATileDBContext

_pa_type_to_str_fmt = {
pa.string(): "U",
Expand All @@ -42,12 +42,14 @@

@contextmanager
def read_h5ad(
input_path: Path, *, mode: str = "r", ctx: Optional[tiledb.Ctx] = None
input_path: Path, *, mode: str = "r", ctx: Optional[SOMATileDBContext] = None
) -> Iterator[ad.AnnData]:
"""
This lets us ingest H5AD with "r" (backed mode) from S3 URIs.
"""
input_handle = tiledb.VFS(ctx=ctx).open(input_path)
ctx = ctx or SOMATileDBContext()
vfs = clib.SOMAVFS(ctx.native_context)
input_handle = clib.SOMAVFSFilebuf(vfs).open(str(input_path))
try:
with _hack_patch_anndata():
anndata = ad.read_h5ad(_FSPathWrapper(input_handle, input_path), mode)
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def from_h5ad(

logging.log_io(None, f"START READING {input_path}")

with read_h5ad(input_path, mode="r", ctx=context.tiledb_ctx) as anndata:
with read_h5ad(input_path, mode="r", ctx=context) as anndata:
logging.log_io(None, _util.format_elapsed(s, f"FINISH READING {input_path}"))

uri = from_anndata(
Expand Down
2 changes: 2 additions & 0 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ void load_soma_group(py::module&);
void load_soma_collection(py::module&);
void load_query_condition(py::module&);
void load_reindexer(py::module&);
void load_soma_vfs(py::module&);

PYBIND11_MODULE(pytiledbsoma, m) {
py::register_exception<TileDBSOMAError>(m, "SOMAError");
Expand Down Expand Up @@ -150,6 +151,7 @@ PYBIND11_MODULE(pytiledbsoma, m) {
load_soma_collection(m);
load_query_condition(m);
load_reindexer(m);
load_soma_vfs(m);
}

}; // namespace libtiledbsomacpp
66 changes: 66 additions & 0 deletions apis/python/src/tiledbsoma/soma_vfs.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/**
* @file soma_vfs.cc
*
* @section LICENSE
*
* The MIT License
*
* @copyright Copyright (c) 2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* @section DESCRIPTION
*
* This file defines the VFS bindings.
*/

#include "common.h"

namespace libtiledbsomacpp {

namespace py = pybind11;
using namespace py::literals;
using namespace tiledbsoma;
using VFSFilebuf = tiledb::impl::VFSFilebuf;

// TODO This temporary workaround prevents namespace clash with tiledb-py.
// Bind tiledb::VFS directly once tiledb-py dependency is removed
class SOMAVFS : public tiledb::VFS {
public:
using tiledb::VFS::VFS;
};

void load_soma_vfs(py::module& m) {
py::class_<SOMAVFS>(m, "SOMAVFS")
.def(
py::init([](std::shared_ptr<SOMAContext> context) {
return SOMAVFS(*context->tiledb_ctx());
}),
"ctx"_a);

py::class_<VFSFilebuf>(m, "SOMAVFSFilebuf")
.def(py::init<const SOMAVFS&>())
.def(
"open",
[](VFSFilebuf& buf, const std::string& uri) {
return buf.open(uri, std::ios::in);
})
.def("close", &VFSFilebuf::close, "should_throw"_a = true);
}
} // namespace libtiledbsomacpp

0 comments on commit 6395e9b

Please sign in to comment.