From 3d6f16009723ca408c8f8a40d989285480511b7b Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Wed, 21 Apr 2021 17:15:08 +0200 Subject: [PATCH] Move to minimalkv --- CHANGES.rst | 4 ++++ asv_bench/benchmarks/index.py | 4 ++-- asv_bench/benchmarks/predicate_pushdown.py | 2 +- asv_bench/benchmarks/write.py | 2 +- conda-requirements.txt | 3 +-- docs/conf.py | 4 +--- docs/environment-docs.yml | 3 +-- docs/guide/cube/command_line_features.rst | 4 ++-- docs/guide/cube/examples.rst | 8 ++++---- docs/guide/cube/glossary.rst | 6 +++--- docs/guide/getting_started.rst | 20 ++++++++----------- docs/guide/mutating_datasets.rst | 6 +++--- docs/guide/partitioning.rst | 2 +- docs/spec/store_interface.rst | 9 ++++----- kartothek/cli/__init__.py | 4 ++-- kartothek/cli/_utils.py | 12 +++++------ kartothek/core/common_metadata.py | 2 +- kartothek/core/docs.py | 12 +++++------ kartothek/core/factory.py | 4 ++-- kartothek/core/typing.py | 2 +- kartothek/core/utils.py | 13 ++++++------ kartothek/io/dask/bag_cube.py | 4 ++-- kartothek/io/dask/common_cube.py | 4 ++-- kartothek/io/dask/dataframe_cube.py | 4 ++-- kartothek/io/eager.py | 14 ++++++------- kartothek/io/eager_cube.py | 18 ++++++++--------- kartothek/io/iter.py | 4 ++-- kartothek/io/testing/read.py | 2 +- kartothek/io/testing/write.py | 2 +- kartothek/io_components/cube/cleanup.py | 2 +- kartothek/io_components/cube/common.py | 6 +++--- kartothek/io_components/cube/copy.py | 4 ++-- .../io_components/cube/query/__init__.py | 4 ++-- kartothek/io_components/cube/query/_group.py | 8 ++++---- kartothek/io_components/cube/remove.py | 2 +- kartothek/io_components/cube/stats.py | 2 +- kartothek/io_components/cube/write.py | 4 ++-- kartothek/io_components/metapartition.py | 2 +- kartothek/io_components/write.py | 2 +- kartothek/serialization/_csv.py | 2 +- kartothek/serialization/_generic.py | 6 +++--- kartothek/serialization/_parquet.py | 4 ++-- kartothek/utils/ktk_adapters.py | 6 +++--- kartothek/utils/store.py | 8 ++++---- .../arrow-compat/generate_reference.py | 2 +- requirements.txt | 3 +-- tests/api/test_discover.py | 2 +- tests/cli/conftest.py | 14 ++++++------- tests/conftest.py | 4 ++-- tests/core/test_dataset_dyn_part.py | 4 ++-- tests/core/test_utils.py | 5 ++--- .../test_dataset_metadata_factory.py | 2 +- tests/io_components/test_utils.py | 6 +++--- tests/serialization/test_arrow_compat.py | 2 +- tests/serialization/test_parquet.py | 4 ++-- tests/utils/test_store.py | 8 ++++---- 56 files changed, 144 insertions(+), 152 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index abb84d1c..3832baa1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,10 @@ Changelog ========= +Kartothek 4.1.0 (2021-04-xx) +============================ + +* Switch from ``simplekv`` and ``storefact`` to their successor ``minimalkv`` as the library providing the store implementations. Kartothek 4.0.2 (2021-04-xx) ============================ diff --git a/asv_bench/benchmarks/index.py b/asv_bench/benchmarks/index.py index 4b6bd52e..864030b1 100644 --- a/asv_bench/benchmarks/index.py +++ b/asv_bench/benchmarks/index.py @@ -9,10 +9,10 @@ import uuid from functools import lru_cache +import minimalkv import numpy as np import pandas as pd import pyarrow as pa -import storefact from kartothek.core.index import ExplicitSecondaryIndex from kartothek.io_components.metapartition import MetaPartition @@ -44,7 +44,7 @@ def setup(self, number_values, number_partitions, dtype): column=self.column_name, index_dct=index_dct, dtype=arrow_type ) self.tmp_dir = tempfile.mkdtemp() - self.store = storefact.get_store_from_url("hfs://{}".format(self.tmp_dir)) + self.store = minimalkv.get_store_from_url("hfs://{}".format(self.tmp_dir)) self.dataset_uuid = "some_uuid" self.storage_key = self.ktk_index.store(self.store, self.dataset_uuid) diff --git a/asv_bench/benchmarks/predicate_pushdown.py b/asv_bench/benchmarks/predicate_pushdown.py index ee876b2e..b0ab43f9 100644 --- a/asv_bench/benchmarks/predicate_pushdown.py +++ b/asv_bench/benchmarks/predicate_pushdown.py @@ -1,4 +1,4 @@ -from storefact import get_store_from_url +from minimalkv import get_store_from_url from kartothek.serialization import ParquetSerializer from kartothek.serialization.testing import get_dataframe_not_nested diff --git a/asv_bench/benchmarks/write.py b/asv_bench/benchmarks/write.py index 50f0c116..47a069c2 100644 --- a/asv_bench/benchmarks/write.py +++ b/asv_bench/benchmarks/write.py @@ -4,7 +4,7 @@ import tempfile import uuid -from storefact import get_store_from_url +from minimalkv import get_store_from_url from kartothek.core.common_metadata import make_meta from kartothek.core.testing import get_dataframe_alltypes diff --git a/conda-requirements.txt b/conda-requirements.txt index 8bcaa631..8b19a533 100644 --- a/conda-requirements.txt +++ b/conda-requirements.txt @@ -1,13 +1,12 @@ dask[dataframe] decorator +minimalkv msgpack-python>=0.5.2 # Currently dask and numpy==1.16.0 clash numpy!=1.15.0,!=1.16.0 pandas>=0.23.0, !=1.0.0 pyarrow>=0.17.1,!=1.0.0, <4 simplejson -simplekv -storefact toolz typing_extensions # Some backports of the py3.8 typing module urlquote>=1.1.3 diff --git a/docs/conf.py b/docs/conf.py index e133a635..ea9c2ec8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -105,13 +105,11 @@ intersphinx_mapping = { "pandas": ("https://pandas.pydata.org/docs/", None), - "simplekv": ("https://simplekv.readthedocs.io/en/stable/", None), + "minimalkv": ("https://minimalkv.readthedocs.io/en/stable/", None), "pyarrow": ("https://arrow.apache.org/docs/", None), "numpy": ("https://numpy.org/doc/stable/", None), "python": ("https://docs.python.org/3", None), "dask": ("https://docs.dask.org/en/stable/", None), - # Storefact isn't exposing any sphinx refs - # "storefact": ("https://storefact.readthedocs.io/en/stable", None), } # In particular type annotations are rendered as its full path to the class but diff --git a/docs/environment-docs.yml b/docs/environment-docs.yml index 07b8ac9d..1babbe3e 100644 --- a/docs/environment-docs.yml +++ b/docs/environment-docs.yml @@ -4,14 +4,13 @@ channels: dependencies: - dask[dataframe] - decorator + - minimalkv - msgpack-python>=0.5.2 # Currently dask and numpy==1.16.0 clash - numpy!=1.15.0,!=1.16.0 - pandas>=0.23.0, !=1.0.0 - pyarrow>=0.17.1,!=1.0.0, <4 - simplejson - - simplekv - - storefact - toolz - typing_extensions # Some backports of the py3.8 typing module - urlquote>=1.1.3 diff --git a/docs/guide/cube/command_line_features.rst b/docs/guide/cube/command_line_features.rst index 129729a2..74093de4 100644 --- a/docs/guide/cube/command_line_features.rst +++ b/docs/guide/cube/command_line_features.rst @@ -17,7 +17,7 @@ Command Line Features Kartothek Cube also features a command line interface (CLI) for some cube operations. To use it, create a ``skv.yml`` file that -describes `storefact`_ stores: +describes `minimalkv`_ stores: .. code-block:: yaml @@ -147,5 +147,5 @@ Some information is not available when reading the schema information and requir Use ``kartothek_cube --help`` to get a list of all commands, or see :mod:`~kartothek.cli`. -.. _storefact: https://github.com/blue-yonder/storefact +.. _minimalkv: https://github.com/data-engineering-collective/minimalkv diff --git a/docs/guide/cube/examples.rst b/docs/guide/cube/examples.rst index 208936a2..3bebce3f 100644 --- a/docs/guide/cube/examples.rst +++ b/docs/guide/cube/examples.rst @@ -13,14 +13,14 @@ First, we want to create a cube for geodata: ... partition_columns=["country"], ... ) -Apart from an abstract cube definition, we need a `simplekv`_-based storage backend: +Apart from an abstract cube definition, we need a `minimalkv`_-based storage backend: >>> from functools import partial >>> import tempfile ->>> import storefact +>>> import minimalkv >>> store_location = tempfile.mkdtemp() >>> store_factory = partial( -... storefact.get_store_from_url, +... minimalkv.get_store_from_url, ... "hfs://" + store_location, ... ) >>> store = store_factory() @@ -424,4 +424,4 @@ geodata++time/table/_common_metadata .. _Dask: https://docs.dask.org/ .. _Dask.Bag: https://docs.dask.org/en/latest/bag.html .. _Dask.DataFrame: https://docs.dask.org/en/latest/dataframe.html -.. _simplekv: https://simplekv.readthedocs.io/ +.. _minimalkv: https://minimalkv.readthedocs.io/ diff --git a/docs/guide/cube/glossary.rst b/docs/guide/cube/glossary.rst index 3f6133a5..0fc7c674 100644 --- a/docs/guide/cube/glossary.rst +++ b/docs/guide/cube/glossary.rst @@ -55,8 +55,8 @@ Glossary Dataset that provides the groundtruth about which :term:`Cell` are in a :term:`Cube`. Store Factory - A callable that does not take any arguments and creates a new `simplekv`_ store when being called. Its type is - ``Callable[[], simplekv.KeyValueStore]``. + A callable that does not take any arguments and creates a new `minimalkv`_ store when being called. Its type is + ``Callable[[], minimalkv.KeyValueStore]``. Query A request for data from the cube, including things like "payload columns", "conditions", and more. @@ -76,4 +76,4 @@ Glossary .. _Data Cubes: https://en.wikipedia.org/wiki/Data_cube .. _Parquet: https://parquet.apache.org/ -.. _simplekv: https://simplekv.readthedocs.io/ \ No newline at end of file +.. _minimalkv: https://minimalkv.readthedocs.io/ diff --git a/docs/guide/getting_started.rst b/docs/guide/getting_started.rst index 8a80b1ba..f1ed8249 100644 --- a/docs/guide/getting_started.rst +++ b/docs/guide/getting_started.rst @@ -47,7 +47,7 @@ We want to store this DataFrame now as a dataset. Therefore, we first need to connect to a storage location. We define a store factory as a callable which contains the storage information. -We will use `storefact`_ in this example to construct such a store factory +We will use `minimalkv`_ in this example to construct such a store factory for the local filesystem (``hfs://`` indicates we are using the local filesystem and what follows is the filepath). @@ -55,7 +55,7 @@ what follows is the filepath). from functools import partial from tempfile import TemporaryDirectory - from storefact import get_store_from_url + from minimalkv import get_store_from_url dataset_dir = TemporaryDirectory() @@ -63,8 +63,8 @@ what follows is the filepath). .. admonition:: Storage locations - `storefact`_ offers support for several stores in Kartothek, these can be created using the - function `storefact.get_store_from_url` with one of the following prefixes: + `minimalkv`_ offers support for several stores in Kartothek, these can be created using the + function `minimalkv.get_store_from_url` with one of the following prefixes: - ``hfs``: Local filesystem - ``hazure``: AzureBlockBlobStorage @@ -74,15 +74,11 @@ Interface --------- Kartothek can write to any location that -fulfills the `simplekv.KeyValueStore interface -`_ as long as they -support `ExtendedKeyspaceMixin -`_ +fulfills the `minimalkv.KeyValueStore interface +`_ as long as they +support ``ExtendedKeyspaceMixin`` (this is necessary so that ``/`` can be used in the storage key name). -For more information, take a look out at the `storefact documentation -`_. - Writing data to storage ======================= @@ -232,5 +228,5 @@ function but returns a collection of ``dask.delayed`` objects. read_table("a_unique_dataset_identifier", store_url, predicates=[[("A", "<", 2.5)]]) -.. _storefact: https://github.com/blue-yonder/storefact +.. _minimalkv: https://github.com/data-engineering-collective/minimalkv .. _dask: https://docs.dask.org/en/latest/ diff --git a/docs/guide/mutating_datasets.rst b/docs/guide/mutating_datasets.rst index 25089d1a..f34a48f1 100644 --- a/docs/guide/mutating_datasets.rst +++ b/docs/guide/mutating_datasets.rst @@ -19,7 +19,7 @@ some data there with Kartothek. import pandas as pd from functools import partial from tempfile import TemporaryDirectory - from storefact import get_store_from_url + from minimalkv import get_store_from_url from kartothek.api.dataset import store_dataframes_as_dataset @@ -236,7 +236,7 @@ When garbage collection is called, the files are removed. .. ipython:: python from kartothek.api.dataset import garbage_collect_dataset - from storefact import get_store_from_url + from minimalkv import get_store_from_url store = get_store_from_url(store_url) @@ -246,7 +246,7 @@ When garbage collection is called, the files are removed. files_before.difference(store.keys()) # Show files removed -.. _storefact: https://github.com/blue-yonder/storefact +.. _minimalkv: https://github.com/data-engineering-collective/minimalkv Mutating indexed datasets diff --git a/docs/guide/partitioning.rst b/docs/guide/partitioning.rst index 25d1fc9a..e0a00bbd 100644 --- a/docs/guide/partitioning.rst +++ b/docs/guide/partitioning.rst @@ -30,7 +30,7 @@ first and store the data there with Kartothek: import pandas as pd from functools import partial from tempfile import TemporaryDirectory - from storefact import get_store_from_url + from minimalkv import get_store_from_url from kartothek.api.dataset import store_dataframes_as_dataset diff --git a/docs/spec/store_interface.rst b/docs/spec/store_interface.rst index df25a845..92d1c9e8 100644 --- a/docs/spec/store_interface.rst +++ b/docs/spec/store_interface.rst @@ -4,7 +4,7 @@ KeyValueStore Interface ======================= -All storage interaction use ``simplekv.KeyValueStore`` as an storage layer +All storage interaction use ``minimalkv.KeyValueStore`` as an storage layer abstraction. This allows convenient access to many different common Key-Value stores (ABS, S3, GCS, local filesystem, etc.) and allows an easy switch between the storage backends to facilitate a simpler test setup. @@ -13,7 +13,7 @@ Generally, all of our public functions accepting a ``store`` argument accept a multitude of different input types and we generally accept all kinds of stores inheriting from ``KeyValueStore``, assuming they implement the pickle protocol. However, there are storages which simply cannot be distributed across processes -or network nodes sensibly. A prime Example is the ``simplekv.memory.DictStore`` +or network nodes sensibly. A prime Example is the ``minimalkv.memory.DictStore`` which uses a simple python dictionary as a backend store. It is technically possible to (de-)serialize the store but once it is deserialized in another process, or another node, the store looses its meaning since the stores are @@ -25,9 +25,8 @@ protocol, or some more complex logic is required to initialize it, kartothek also accepts _factories_ which must be a callable returning a ``KeyValueStore`` (see also ``kartothek.core.typing.StoreFactory``). -For convenience we also offer a `storefact`_ integration and accept store urls +For convenience we also offer an integration that accepts store urls which proves another easy level of access and is well suited for ad-hoc investigations. -.. _simplekv: https://simplekv.readthedocs.io/ -.. _storefact: https://storefact.readthedocs.io/ \ No newline at end of file +.. _minimalkv: https://minimalkv.readthedocs.io/ diff --git a/kartothek/cli/__init__.py b/kartothek/cli/__init__.py index 8ad64838..feef0b9b 100644 --- a/kartothek/cli/__init__.py +++ b/kartothek/cli/__init__.py @@ -4,7 +4,7 @@ .. important:: This module does not contain any public APIs. -Kartothek comes with a CLI tool named ``kartothek_cube``. To use it, create an YAML file that contains a dictionary of `storefact`_ +Kartothek comes with a CLI tool named ``kartothek_cube``. To use it, create an YAML file that contains a dictionary of `minimalkv`_ stores (keys are names of the store and the values are dicts that contain the store config). ``Kartothek`` uses a `YAML`_ file called ``skv.yml`` and a store called ``dataset`` by default, but you may pass ``--skv`` and ``--store`` to change these. An example file could look like: @@ -30,7 +30,7 @@ .. _Dask: https://docs.dask.org/ -.. _storefact: https://github.com/blue-yonder/storefact +.. _minimalkv: https://github.com/data-engineering-collective/minimalkv .. _YAML: https://yaml.org/ """ import logging diff --git a/kartothek/cli/_utils.py b/kartothek/cli/_utils.py index 638b7fee..41ef5333 100644 --- a/kartothek/cli/_utils.py +++ b/kartothek/cli/_utils.py @@ -2,7 +2,7 @@ from functools import partial import click -import storefact +import minimalkv import yaml from kartothek.api.discover import discover_cube @@ -18,7 +18,7 @@ def get_cube(store, uuid_prefix): ---------- uuid_prefix: str Dataset UUID prefix. - store: Union[Callable[[], simplekv.KeyValueStore], simplekv.KeyValueStore] + store: Union[Callable[[], minimalkv.KeyValueStore], minimalkv.KeyValueStore] KV store. Returns @@ -41,18 +41,18 @@ def get_cube(store, uuid_prefix): def get_store(skv, store): """ - Get simplekv store from storefact config file. + Get minimalkv store from minimalkv config file. Parameters ---------- skv: str - Name of the storefact yaml. Normally ``'skv.yml'``. + Name of the minimalkv yaml. Normally ``'skv.yml'``. store: str ID of the store. Returns ------- - store_factory: Callable[[], simplekv.KeyValueStore] + store_factory: Callable[[], minimalkv.KeyValueStore] Store object. Raises @@ -73,7 +73,7 @@ def get_store(skv, store): "Could not find store {store} in {skv}".format(store=store, skv=skv) ) - return partial(storefact.get_store, **store_cfg[store]) + return partial(minimalkv.get_store, **store_cfg[store]) def _match_pattern(what, items, pattern): diff --git a/kartothek/core/common_metadata.py b/kartothek/core/common_metadata.py index 2472dcc4..59f5d454 100644 --- a/kartothek/core/common_metadata.py +++ b/kartothek/core/common_metadata.py @@ -12,7 +12,7 @@ import pyarrow as pa import pyarrow.parquet as pq import simplejson -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.core import naming from kartothek.core._compat import load_json diff --git a/kartothek/core/docs.py b/kartothek/core/docs.py index 1c5c39b2..4287c3ab 100644 --- a/kartothek/core/docs.py +++ b/kartothek/core/docs.py @@ -7,11 +7,11 @@ _PARAMETER_MAPPING = { "store": """ - store: Callable or str or simplekv.KeyValueStore + store: Callable or str or minimalkv.KeyValueStore The store where we can find or store the dataset. - Can be either ``simplekv.KeyValueStore``, a storefact store url or a - generic Callable producing a ``simplekv.KeyValueStore``""", + Can be either ``minimalkv.KeyValueStore``, a minimalkv store url or a + generic Callable producing a ``minimalkv.KeyValueStore``""", "overwrite": """ overwrite: Optional[bool] If True, allow overwrite of an existing dataset.""", @@ -70,12 +70,12 @@ `merge_datasets__pipeline` key that contains the source dataset uuids for the merge.""", "output_store": """ - output_store : Union[Callable, str, simplekv.KeyValueStore] + output_store : Union[Callable, str, minimalkv.KeyValueStore] If given, the resulting dataset is written to this store. By default the input store. - Can be either `simplekv.KeyValueStore`, a storefact store url or a - generic Callable producing a ``simplekv.KeyValueStore``""", + Can be either `minimalkv.KeyValueStore`, a minimalkv store url or a + generic Callable producing a ``minimalkv.KeyValueStore``""", "metadata": """ metadata : Optional[Dict] A dictionary used to update the dataset metadata.""", diff --git a/kartothek/core/factory.py b/kartothek/core/factory.py index 59752e8d..582b2537 100644 --- a/kartothek/core/factory.py +++ b/kartothek/core/factory.py @@ -8,7 +8,7 @@ from kartothek.core.utils import lazy_store if TYPE_CHECKING: - from simplekv import KeyValueStore + from minimalkv import KeyValueStore __all__ = ("DatasetFactory",) @@ -38,7 +38,7 @@ def __init__( .. code:: from functools import partial - from storefact import get_store_from_url + from minimalkv import get_store_from_url from kartothek.io.eager import read_table ds_factory = DatasetFactory( diff --git a/kartothek/core/typing.py b/kartothek/core/typing.py index 828bd830..0d0ced2e 100644 --- a/kartothek/core/typing.py +++ b/kartothek/core/typing.py @@ -1,6 +1,6 @@ from typing import Callable, Union -from simplekv import KeyValueStore +from minimalkv import KeyValueStore StoreFactory = Callable[[], KeyValueStore] StoreInput = Union[str, KeyValueStore, StoreFactory] diff --git a/kartothek/core/utils.py b/kartothek/core/utils.py index ff15839a..2de33e5b 100644 --- a/kartothek/core/utils.py +++ b/kartothek/core/utils.py @@ -2,8 +2,7 @@ from functools import partial from typing import Any, Union, cast -from simplekv import KeyValueStore -from storefact import get_store_from_url +from minimalkv import KeyValueStore, get_store_from_url from kartothek.core.naming import MAX_METADATA_VERSION, MIN_METADATA_VERSION from kartothek.core.typing import StoreFactory, StoreInput @@ -50,11 +49,11 @@ def ensure_string_type(obj: Union[bytes, str]) -> str: return str(obj) -def _is_simplekv_key_value_store(obj: Any) -> bool: +def _is_minimalkv_key_value_store(obj: Any) -> bool: """ - Check whether ``obj`` is the ``simplekv.KeyValueStore``-like class. + Check whether ``obj`` is the ``minimalkv.KeyValueStore``-like class. - simplekv uses duck-typing, e.g. for decorators. Therefore, + minimalkv uses duck-typing, e.g. for decorators. Therefore, avoid `isinstance(store, KeyValueStore)`, as it would be unreliable. Instead, only roughly verify that `store` looks like a KeyValueStore. """ @@ -67,7 +66,7 @@ def ensure_store(store: StoreInput) -> KeyValueStore: """ # This function is often used in an eager context where we may allow # non-serializable stores, so skip the pickle test. - if _is_simplekv_key_value_store(store): + if _is_minimalkv_key_value_store(store): return store return lazy_store(store)() @@ -97,7 +96,7 @@ def lazy_store(store: StoreInput) -> StoreFactory: return ret_val else: - if not _is_simplekv_key_value_store(store): + if not _is_minimalkv_key_value_store(store): raise TypeError( f"Provided incompatible store type. Got {type(store)} but expected {StoreInput}." ) diff --git a/kartothek/io/dask/bag_cube.py b/kartothek/io/dask/bag_cube.py index c21cd403..d1672eda 100644 --- a/kartothek/io/dask/bag_cube.py +++ b/kartothek/io/dask/bag_cube.py @@ -4,7 +4,7 @@ from typing import Any, Dict, Iterable, Optional, Union import dask.bag as db -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.api.discover import discover_datasets_unchecked from kartothek.core.cube.cube import Cube @@ -167,7 +167,7 @@ def query_cube_bag( ---------- cube: Cube Cube specification. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore KV store that preserves the cube. conditions: Union[None, Condition, Iterable[Condition], Conjunction] Conditions that should be applied, optional. diff --git a/kartothek/io/dask/common_cube.py b/kartothek/io/dask/common_cube.py index f059afab..7d026dc4 100644 --- a/kartothek/io/dask/common_cube.py +++ b/kartothek/io/dask/common_cube.py @@ -6,7 +6,7 @@ from typing import Any, Dict, Iterable, Mapping, Optional, Set import dask.bag as db -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.api.consistency import get_cube_payload from kartothek.api.discover import discover_datasets, discover_datasets_unchecked @@ -298,7 +298,7 @@ def query_cube_bag_internal( ---------- cube: Cube Cube specification. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore KV store that preserves the cube. conditions: Union[None, Condition, Iterable[Condition], Conjunction] Conditions that should be applied, optional. diff --git a/kartothek/io/dask/dataframe_cube.py b/kartothek/io/dask/dataframe_cube.py index 07f21daa..76529bd9 100644 --- a/kartothek/io/dask/dataframe_cube.py +++ b/kartothek/io/dask/dataframe_cube.py @@ -7,7 +7,7 @@ import dask.bag as db import dask.dataframe as dd from dask.delayed import Delayed -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.api.discover import discover_datasets_unchecked from kartothek.core.cube.cube import Cube @@ -210,7 +210,7 @@ def query_cube_dataframe( ---------- cube: Cube Cube specification. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore KV store that preserves the cube. conditions: Union[None, Condition, Iterable[Condition], Conjunction] Conditions that should be applied, optional. diff --git a/kartothek/io/eager.py b/kartothek/io/eager.py index 6ac0f531..381fc650 100644 --- a/kartothek/io/eager.py +++ b/kartothek/io/eager.py @@ -2,7 +2,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast import pandas as pd -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.core.common_metadata import ( empty_dataframe_from_schema, @@ -121,10 +121,10 @@ def read_dataset_as_dataframes( .. code :: - >>> import storefact + >>> import minimalkv >>> from kartothek.io.eager import read_dataset_as_dataframes - >>> store = storefact.get_store_from_url('s3://bucket_with_dataset') + >>> store = minimalkv.get_store_from_url('s3://bucket_with_dataset') >>> dfs = read_dataset_as_dataframes('dataset_uuid', store, 'core') @@ -177,10 +177,10 @@ def read_dataset_as_metapartitions( .. code :: - >>> import storefact + >>> import minimalkv >>> from kartothek.io.eager import read_dataset_as_dataframe - >>> store = storefact.get_store_from_url('s3://bucket_with_dataset') + >>> store = minimalkv.get_store_from_url('s3://bucket_with_dataset') >>> list_mps = read_dataset_as_metapartitions('dataset_uuid', store, 'core') @@ -235,10 +235,10 @@ def read_table( .. code :: - >>> import storefact + >>> import minimalkv >>> from kartothek.io.eager import read_table - >>> store = storefact.get_store_from_url('s3://bucket_with_dataset') + >>> store = minimalkv.get_store_from_url('s3://bucket_with_dataset') >>> df = read_table(store, 'dataset_uuid', 'core') diff --git a/kartothek/io/eager_cube.py b/kartothek/io/eager_cube.py index 34a7747e..1791a85b 100644 --- a/kartothek/io/eager_cube.py +++ b/kartothek/io/eager_cube.py @@ -4,9 +4,9 @@ from collections import defaultdict from typing import Any, Dict, Iterable, List, Optional, Sequence, Union +import minimalkv import pandas as pd -import simplekv -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.api.consistency import get_cube_payload from kartothek.api.discover import discover_datasets, discover_datasets_unchecked @@ -338,7 +338,7 @@ def query_cube( ---------- cube: Cube Cube specification. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore KV store that preserves the cube. conditions: Union[None, Condition, Iterable[Condition], Conjunction] Conditions that should be applied, optional. @@ -394,7 +394,7 @@ def delete_cube(cube, store, datasets=None): ---------- cube: Cube Cube specification. - store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] KV store. datasets: Union[None, Iterable[str], Dict[str, kartothek.core.dataset.DatasetMetadata]] Datasets to delete, must all be part of the cube. May be either the result of :func:`~kartothek.api.discover.discover_datasets`, a list @@ -427,9 +427,9 @@ def copy_cube(cube, src_store, tgt_store, overwrite=False, datasets=None): ---------- cube: Cube Cube specification. - src_store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + src_store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] Source KV store. - tgt_store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + tgt_store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] Target KV store. overwrite: bool If possibly existing datasets in the target store should be overwritten. @@ -463,7 +463,7 @@ def collect_stats(cube, store, datasets=None): ---------- cube: Cube Cube specification. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore KV store that preserves the cube. datasets: Union[None, Iterable[str], Dict[str, kartothek.core.dataset.DatasetMetadata]] Datasets to query, must all be part of the cube. May be either the result of :func:`~kartothek.api.discover.discover_datasets`, a list @@ -501,7 +501,7 @@ def cleanup_cube(cube, store): ---------- cube: Cube Cube specification. - store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] KV store. """ if callable(store): @@ -516,7 +516,7 @@ def cleanup_cube(cube, store): def remove_partitions( cube: Cube, - store: Union[simplekv.KeyValueStore, StoreFactory], + store: Union[minimalkv.KeyValueStore, StoreFactory], conditions: Union[None, Condition, Sequence[Condition], Conjunction] = None, ktk_cube_dataset_ids: Optional[Union[Sequence[str], str]] = None, metadata: Optional[Dict[str, Dict[str, Any]]] = None, diff --git a/kartothek/io/iter.py b/kartothek/io/iter.py index df0ebce1..6628fa81 100644 --- a/kartothek/io/iter.py +++ b/kartothek/io/iter.py @@ -130,10 +130,10 @@ def read_dataset_as_dataframes__iterator( .. code :: - >>> import storefact + >>> import minimalkv >>> from kartothek.io.iter import read_dataset_as_dataframes__iterator - >>> store = storefact.get_store_from_url('s3://bucket_with_dataset') + >>> store = minimalkv.get_store_from_url('s3://bucket_with_dataset') >>> dataframes = read_dataset_as_dataframes__iterator('dataset_uuid', store) >>> next(dataframes) diff --git a/kartothek/io/testing/read.py b/kartothek/io/testing/read.py index 073fc06b..18470226 100644 --- a/kartothek/io/testing/read.py +++ b/kartothek/io/testing/read.py @@ -34,7 +34,7 @@ import pandas as pd import pandas.testing as pdt import pytest -from storefact import get_store_from_url +from minimalkv import get_store_from_url from kartothek.io.eager import store_dataframes_as_dataset from kartothek.io.iter import store_dataframes_as_dataset__iter diff --git a/kartothek/io/testing/write.py b/kartothek/io/testing/write.py index 1995a945..1a376321 100644 --- a/kartothek/io/testing/write.py +++ b/kartothek/io/testing/write.py @@ -9,7 +9,7 @@ import pandas as pd import pandas.testing as pdt import pytest -from storefact import get_store_from_url +from minimalkv import get_store_from_url from kartothek.core.dataset import DatasetMetadata from kartothek.core.uuid import gen_uuid diff --git a/kartothek/io_components/cube/cleanup.py b/kartothek/io_components/cube/cleanup.py index 0fc45de9..35cf33ca 100644 --- a/kartothek/io_components/cube/cleanup.py +++ b/kartothek/io_components/cube/cleanup.py @@ -12,7 +12,7 @@ def get_keys_to_clean(cube_uuid_prefix, datasets, store): Parameters ---------- - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore KV store. datasets: Dict[str, kartothek.core.dataset.DatasetMetadata] Datasets to scan for keys. diff --git a/kartothek/io_components/cube/common.py b/kartothek/io_components/cube/common.py index 9bc9660f..9525f57a 100644 --- a/kartothek/io_components/cube/common.py +++ b/kartothek/io_components/cube/common.py @@ -29,13 +29,13 @@ def assert_stores_different(store1, store2, prefix): """ Check that given stores are different. - This is a workaround for tha fact that simplekv stores normally do not implemenent some sane equality check. + This is a workaround for tha fact that minimalkv stores normally do not implemenent some sane equality check. Parameters ---------- - store1: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + store1: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] First store. - store2: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + store2: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] Second store, will be used to write a test key to. prefix: str Prefix to be used for the temporary key used for the equality check. diff --git a/kartothek/io_components/cube/copy.py b/kartothek/io_components/cube/copy.py index decc21a4..550eb5bb 100644 --- a/kartothek/io_components/cube/copy.py +++ b/kartothek/io_components/cube/copy.py @@ -16,9 +16,9 @@ def get_copy_keys(cube, src_store, tgt_store, overwrite, datasets=None): ---------- cube: kartothek.core.cube.cube.Cube Cube specification. - src_store: Union[Callable[[], simplekv.KeyValueStore], simplekv.KeyValueStore] + src_store: Union[Callable[[], minimalkv.KeyValueStore], minimalkv.KeyValueStore] Source KV store. - tgt_store: Union[Callable[[], simplekv.KeyValueStore], simplekv.KeyValueStore] + tgt_store: Union[Callable[[], minimalkv.KeyValueStore], minimalkv.KeyValueStore] Target KV store. overwrite: bool If possibly existing datasets in the target store should be overwritten. diff --git a/kartothek/io_components/cube/query/__init__.py b/kartothek/io_components/cube/query/__init__.py index a60410e2..9465b174 100644 --- a/kartothek/io_components/cube/query/__init__.py +++ b/kartothek/io_components/cube/query/__init__.py @@ -59,7 +59,7 @@ def _load_required_explicit_indices(datasets, intention, store): Available datasets. intention: kartothek.io_components.cube.query._intention.QueryIntention Query intention. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore Store to query from. Returns @@ -279,7 +279,7 @@ def plan_query( By which column logical partitions should be formed. payload_columns: Optional[Iterable[str]] Which columns apart from ``dimension_columns`` and ``partition_by`` should be returned. - store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] Store to query from. Returns diff --git a/kartothek/io_components/cube/query/_group.py b/kartothek/io_components/cube/query/_group.py index c50d7237..2d8ea4bb 100644 --- a/kartothek/io_components/cube/query/_group.py +++ b/kartothek/io_components/cube/query/_group.py @@ -66,7 +66,7 @@ def _load_all_mps(mps, store, load_columns, predicates, empty): ---------- mps: Iterable[MetaPartition] MetaPartitions to load. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore Store to load data from. load_columns: List[str] Columns to load. @@ -108,7 +108,7 @@ def _load_partition_dfs(cube, group, partition_mps, store): Query group. partition_mps: Dict[str, Iterable[MetaPartition]] MetaPartitions for every dataset in this partition. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore Store to load data from. Returns @@ -168,7 +168,7 @@ def _load_partition(cube, group, partition_mps, store): Query group. partition_mps: Dict[str, Iterable[MetaPartition]] MetaPartitions for every dataset in this partition. - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore Store to load data from. Returns @@ -199,7 +199,7 @@ def load_group(group, store, cube): ---------- group: QueryGroup Query group. - store: Union[Callable[[], simplekv.KeyValueStore], simplekv.KeyValueStore] + store: Union[Callable[[], minimalkv.KeyValueStore], minimalkv.KeyValueStore] Store to load data from. cube: kartothek.core.cube.cube.Cube Cube specification. diff --git a/kartothek/io_components/cube/remove.py b/kartothek/io_components/cube/remove.py index a6e3fb3e..4f3aaa24 100644 --- a/kartothek/io_components/cube/remove.py +++ b/kartothek/io_components/cube/remove.py @@ -22,7 +22,7 @@ def prepare_metapartitions_for_removal_action( ---------- cube: kartothek.core.cube.cube.Cube Cube spec. - store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] Store. conditions: Union[None, Condition, Iterable[Condition], Conjunction] Conditions that should be applied, optional. Defaults to "entire cube". diff --git a/kartothek/io_components/cube/stats.py b/kartothek/io_components/cube/stats.py index 3e77acec..809981d2 100644 --- a/kartothek/io_components/cube/stats.py +++ b/kartothek/io_components/cube/stats.py @@ -73,7 +73,7 @@ def collect_stats_block(metapartitions, store): ---------- metapartitions: Tuple[Tuple[str, Tuple[kartothek.io_components.metapartition.MetaPartition, ...]], ...] Part of the result of :meth:`get_metapartitions_for_stats`. - store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] KV store. Returns diff --git a/kartothek/io_components/cube/write.py b/kartothek/io_components/cube/write.py index b879f7a9..2c61c4e6 100644 --- a/kartothek/io_components/cube/write.py +++ b/kartothek/io_components/cube/write.py @@ -416,7 +416,7 @@ def apply_postwrite_checks(datasets, cube, store, existing_datasets): Datasets that just got written. cube: kartothek.core.cube.cube.Cube Cube specification. - store: Union[Callable[[], simplekv.KeyValueStore], simplekv.KeyValueStore] + store: Union[Callable[[], minimalkv.KeyValueStore], minimalkv.KeyValueStore] KV store. existing_datasets: Dict[str, kartothek.core.dataset.DatasetMetadata] Datasets that were present before the write procedure started. @@ -537,7 +537,7 @@ def _rollback_transaction(existing_datasets, new_datasets, store): Datasets that existings before the write process started. new_datasets: Dict[str, kartothek.core.dataset.DatasetMetadata] Datasets that where created / changed during the write process. - store: Union[Callable[[], simplekv.KeyValueStore], simplekv.KeyValueStore] + store: Union[Callable[[], minimalkv.KeyValueStore], minimalkv.KeyValueStore] KV store. """ if callable(store): diff --git a/kartothek/io_components/metapartition.py b/kartothek/io_components/metapartition.py index 871b4da2..2ac2ee39 100644 --- a/kartothek/io_components/metapartition.py +++ b/kartothek/io_components/metapartition.py @@ -24,7 +24,7 @@ import numpy as np import pandas as pd import pyarrow as pa -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.core import naming from kartothek.core.common_metadata import ( diff --git a/kartothek/io_components/write.py b/kartothek/io_components/write.py index 874766a0..33a60127 100644 --- a/kartothek/io_components/write.py +++ b/kartothek/io_components/write.py @@ -1,7 +1,7 @@ from functools import partial from typing import Dict, Iterable, List, Optional, cast -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.core import naming from kartothek.core.common_metadata import ( diff --git a/kartothek/serialization/_csv.py b/kartothek/serialization/_csv.py index d50d032f..6d9d6401 100644 --- a/kartothek/serialization/_csv.py +++ b/kartothek/serialization/_csv.py @@ -10,8 +10,8 @@ import pandas as pd import pyarrow as pa +from minimalkv import KeyValueStore from pandas.errors import EmptyDataError -from simplekv import KeyValueStore from ._generic import ( DataFrameSerializer, diff --git a/kartothek/serialization/_generic.py b/kartothek/serialization/_generic.py index 931df72b..c54bf0af 100644 --- a/kartothek/serialization/_generic.py +++ b/kartothek/serialization/_generic.py @@ -19,8 +19,8 @@ import numpy as np import pandas as pd +from minimalkv import KeyValueStore from pandas.api.types import is_list_like -from simplekv import KeyValueStore from kartothek.serialization._util import _check_contains_null @@ -37,7 +37,7 @@ class DataFrameSerializer: """ Abstract class that supports serializing DataFrames to/from - simplekv stores. + minimalkv stores. :meta public: """ @@ -137,7 +137,7 @@ def store(self, store: KeyValueStore, key_prefix: str, df: pd.DataFrame) -> str: Parameters ---------- - store: simplekv.KeyValueStore + store: minimalkv.KeyValueStore store engine key_prefix: str Key prefix that specifies a path where object should be diff --git a/kartothek/serialization/_parquet.py b/kartothek/serialization/_parquet.py index 41c550a5..87554928 100644 --- a/kartothek/serialization/_parquet.py +++ b/kartothek/serialization/_parquet.py @@ -13,8 +13,8 @@ import pandas as pd import pyarrow as pa import pyarrow.parquet as pq +from minimalkv import KeyValueStore from pyarrow.parquet import ParquetFile -from simplekv import KeyValueStore from ._generic import ( DataFrameSerializer, @@ -28,7 +28,7 @@ try: # Only check for BotoStore instance if boto is really installed - from simplekv.net.botostore import BotoStore + from minimalkv.net.botostore import BotoStore HAVE_BOTO = True except ImportError: diff --git a/kartothek/utils/ktk_adapters.py b/kartothek/utils/ktk_adapters.py index 38246119..abca7499 100644 --- a/kartothek/utils/ktk_adapters.py +++ b/kartothek/utils/ktk_adapters.py @@ -5,7 +5,7 @@ import pandas as pd import pyarrow.parquet as pq -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.core.factory import DatasetFactory from kartothek.core.index import ExplicitSecondaryIndex @@ -108,7 +108,7 @@ def metadata_factory_from_dataset(dataset, with_schema=True, store=None): Already loaded dataset. with_schema: bool If dataset was loaded with ``load_schema``. - store: Optional[Callable[[], simplekv.KeyValueStore]] + store: Optional[Callable[[], minimalkv.KeyValueStore]] Optional store factory. Returns @@ -134,7 +134,7 @@ def get_physical_partition_stats(metapartitions, store): ---------- metapartitions: Iterable[kartothek.io_components.metapartition.MetaPartition] Iterable of metapartitions belonging to the same physical partition. - store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] KV store. Returns diff --git a/kartothek/utils/store.py b/kartothek/utils/store.py index bae3474a..d7e31204 100644 --- a/kartothek/utils/store.py +++ b/kartothek/utils/store.py @@ -1,11 +1,11 @@ """ -Workarounds for limitations of the simplekv API. +Workarounds for limitations of the minimalkv API. """ import logging import time from urllib.parse import quote -from simplekv.contrib import VALID_KEY_RE_EXTENDED +from minimalkv.contrib import VALID_KEY_RE_EXTENDED try: # azure-storage-blob < 12 @@ -194,9 +194,9 @@ def copy_keys(keys, src_store, tgt_store): ---------- keys: Iterable[str] Keys to copy. - src_store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + src_store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] Source KV store. - tgt_store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]] + tgt_store: Union[minimalkv.KeyValueStore, Callable[[], minimalkv.KeyValueStore]] Target KV store. """ if callable(src_store): diff --git a/reference-data/arrow-compat/generate_reference.py b/reference-data/arrow-compat/generate_reference.py index fe6ddadc..11daa966 100755 --- a/reference-data/arrow-compat/generate_reference.py +++ b/reference-data/arrow-compat/generate_reference.py @@ -2,7 +2,7 @@ import os import pyarrow as pa -from storefact import get_store_from_url +from minimalkv import get_store_from_url from kartothek.core.testing import get_dataframe_alltypes from kartothek.serialization import ParquetSerializer diff --git a/requirements.txt b/requirements.txt index b6d10a31..007026c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,12 @@ dask[dataframe] decorator +minimalkv msgpack>=0.5.2 # Currently dask and numpy==1.16.0 clash numpy!=1.15.0,!=1.16.0 pandas>=0.23.0, !=1.0.0 pyarrow>=0.17.1,!=1.0.0, <4 simplejson -simplekv -storefact toolz typing_extensions; python_version<"3.8" # Some backports of the py3.8 typing module urlquote>=1.1.3 diff --git a/tests/api/test_discover.py b/tests/api/test_discover.py index 7320a71a..65278cae 100644 --- a/tests/api/test_discover.py +++ b/tests/api/test_discover.py @@ -2,7 +2,7 @@ import pandas as pd import pytest -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.api.discover import ( discover_cube, diff --git a/tests/cli/conftest.py b/tests/cli/conftest.py index 0b00a2d7..1cd9c40b 100644 --- a/tests/cli/conftest.py +++ b/tests/cli/conftest.py @@ -3,10 +3,10 @@ import os from datetime import datetime +import minimalkv import numpy as np import pandas as pd import pytest -import storefact from click.testing import CliRunner from freezegun import freeze_time @@ -67,7 +67,7 @@ def storecfg2(): @pytest.fixture def azurestorecfg(azure_store_cfg_factory): cfg = azure_store_cfg_factory("cli") - store = storefact.get_store(**cfg) + store = minimalkv.get_store(**cfg) for k in list(store.keys()): store.delete(k) return cfg @@ -76,7 +76,7 @@ def azurestorecfg(azure_store_cfg_factory): @pytest.fixture def azurestorecfg2(azure_store_cfg_factory): cfg = azure_store_cfg_factory("cli2") - store = storefact.get_store(**cfg) + store = minimalkv.get_store(**cfg) for k in list(store.keys()): store.delete(k) return cfg @@ -84,17 +84,17 @@ def azurestorecfg2(azure_store_cfg_factory): @pytest.fixture def store(storecfg): - return storefact.get_store(**storecfg) + return minimalkv.get_store(**storecfg) @pytest.fixture def store2(storecfg2): - return storefact.get_store(**storecfg2) + return minimalkv.get_store(**storecfg2) @pytest.fixture def azurestore(azurestorecfg): - store = storefact.get_store(**azurestorecfg) + store = minimalkv.get_store(**azurestorecfg) yield store # prevent ResourceWarning gc.collect() @@ -103,7 +103,7 @@ def azurestore(azurestorecfg): @pytest.fixture def azurestore2(azurestorecfg2): - store = storefact.get_store(**azurestorecfg2) + store = minimalkv.get_store(**azurestorecfg2) yield store # prevent ResourceWarning gc.collect() diff --git a/tests/conftest.py b/tests/conftest.py index be0d9c19..71488f7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,7 @@ import pandas as pd import pytest -import storefact +import minimalkv # fmt: off pytest.register_assert_rewrite("kartothek.io.testing") @@ -114,7 +114,7 @@ def _refuse_write(*args, **kwargs): def _get_store(path): url = "hfs://{}".format(path) - store = storefact.get_store_from_url(url) + store = minimalkv.get_store_from_url(url) store.delete = partial(_check_and_delete, store=store, delete_orig=store.delete) return store diff --git a/tests/core/test_dataset_dyn_part.py b/tests/core/test_dataset_dyn_part.py index a4dbb4c6..81ebf983 100644 --- a/tests/core/test_dataset_dyn_part.py +++ b/tests/core/test_dataset_dyn_part.py @@ -3,10 +3,10 @@ import tempfile from urllib.parse import quote +import minimalkv import numpy as np import pandas as pd import simplejson -import storefact from kartothek.core.common_metadata import ( _get_common_metadata_key, @@ -317,7 +317,7 @@ def test_dask_partitions(metadata_version): os.mkdir("{}/{}".format(bucket_dir, dataset_uuid)) table_dir = "{}/{}/core".format(bucket_dir, dataset_uuid) os.mkdir(table_dir) - store = storefact.get_store_from_url("hfs://{}".format(bucket_dir)) + store = minimalkv.get_store_from_url("hfs://{}".format(bucket_dir)) locations = ["L-{}".format(i) for i in range(2)] df = pd.DataFrame() diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 98f94bf4..e8bd4a65 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -1,9 +1,8 @@ from functools import partial import pytest -from simplekv import KeyValueStore -from simplekv.decorator import PrefixDecorator -from storefact import get_store_from_url +from minimalkv import KeyValueStore, get_store_from_url +from minimalkv.decorator import PrefixDecorator from kartothek.core.utils import ensure_store, lazy_store diff --git a/tests/io_components/test_dataset_metadata_factory.py b/tests/io_components/test_dataset_metadata_factory.py index 2d7c57b8..3e35a779 100644 --- a/tests/io_components/test_dataset_metadata_factory.py +++ b/tests/io_components/test_dataset_metadata_factory.py @@ -3,7 +3,7 @@ from functools import partial import pytest -from simplekv import KeyValueStore +from minimalkv import KeyValueStore from kartothek.core.factory import DatasetFactory diff --git a/tests/io_components/test_utils.py b/tests/io_components/test_utils.py index f17fc5b6..1ebaf002 100644 --- a/tests/io_components/test_utils.py +++ b/tests/io_components/test_utils.py @@ -6,7 +6,7 @@ import pandas.testing as pdt import pyarrow as pa import pytest -from storefact import get_store_from_url +from minimalkv import get_store_from_url from kartothek.io_components.utils import ( align_categories, @@ -92,7 +92,7 @@ def func(arg1, partition_on, delete_scope=None): assert (expected[0], expected[1], []) == func(test_arg1, test_partition_on) -@pytest.mark.parametrize("_type", ["callable", "url", "simplekv"]) +@pytest.mark.parametrize("_type", ["callable", "url", "minimalkv"]) def test_normalize_store(tmpdir, _type): store_url = f"hfs://{tmpdir}" @@ -108,7 +108,7 @@ def func(store): store_test = partial(get_store_from_url, store_url) elif _type == "url": store_test = store_url - elif _type == "simplekv": + elif _type == "minimalkv": store_test = store else: raise AssertionError(f"unknown parametrization {_type}") diff --git a/tests/serialization/test_arrow_compat.py b/tests/serialization/test_arrow_compat.py index e9bf7714..7b2e6e34 100644 --- a/tests/serialization/test_arrow_compat.py +++ b/tests/serialization/test_arrow_compat.py @@ -6,7 +6,7 @@ import pandas.testing as pdt import pytest -from storefact import get_store_from_url +from minimalkv import get_store_from_url from kartothek.core.testing import get_dataframe_alltypes from kartothek.serialization import ParquetSerializer diff --git a/tests/serialization/test_parquet.py b/tests/serialization/test_parquet.py index cad38c8d..a40795d3 100644 --- a/tests/serialization/test_parquet.py +++ b/tests/serialization/test_parquet.py @@ -1,12 +1,12 @@ import os from datetime import date, datetime +import minimalkv import numpy as np import pandas as pd import pandas.testing as pdt import pyarrow as pa import pytest -import storefact from pyarrow.parquet import ParquetFile from kartothek.serialization import DataFrameSerializer, ParquetSerializer @@ -27,7 +27,7 @@ def reference_store(): "reference-data", "pyarrow-bugs", ) - return storefact.get_store_from_url("hfs://{}".format(path)) + return minimalkv.get_store_from_url("hfs://{}".format(path)) def test_timestamp_us(store): diff --git a/tests/utils/test_store.py b/tests/utils/test_store.py index a6c90aa9..4dd69352 100644 --- a/tests/utils/test_store.py +++ b/tests/utils/test_store.py @@ -2,8 +2,8 @@ import base64 import hashlib +import minimalkv import pytest -import storefact from kartothek.utils.store import ( _azure_bbs_content_md5, @@ -33,7 +33,7 @@ def _gen_store(store_type, tmpdir, suffix, azure_store_cfg_factory): else: raise ValueError("Unknown store type: {}".format(store_type)) - store = storefact.get_store(**cfg) + store = minimalkv.get_store(**cfg) for k in store.keys(): store.delete(k) @@ -67,7 +67,7 @@ def store2(request, tmpdir, azure_store_cfg_factory): ) def test_azure_implementation(azure_store_cfg_factory): cfg = azure_store_cfg_factory("ts") - store = storefact.get_store(**cfg) + store = minimalkv.get_store(**cfg) assert _has_azure_bbs(store) content = b"foo" store.put("key0", content) @@ -85,7 +85,7 @@ def test_azure_implementation(azure_store_cfg_factory): ) def test_azure12_implementation(azure_store_cfg_factory): cfg = azure_store_cfg_factory("ts") - store = storefact.get_store(**cfg) + store = minimalkv.get_store(**cfg) assert _has_azure_cc(store) content = b"foobar" store.put("key0", content)