From 72260ba3935530bee8a284284fcb9dc4d51322ee Mon Sep 17 00:00:00 2001 From: faph Date: Sat, 16 Sep 2023 13:00:40 +0100 Subject: [PATCH] Deserialize many to return an iterator --- src/py_adapter/__init__.py | 8 ++++---- src/py_adapter/plugin/__init__.py | 7 ++++--- src/py_adapter/plugin/_avro.py | 11 ++++++----- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/py_adapter/__init__.py b/src/py_adapter/__init__.py index f7f4c54..25eff37 100644 --- a/src/py_adapter/__init__.py +++ b/src/py_adapter/__init__.py @@ -22,6 +22,7 @@ import inspect import logging import uuid +from collections.abc import Iterator from typing import ( Any, Callable, @@ -142,10 +143,9 @@ def deserialize(data: bytes, py_type: Type[Obj], *, format: str, writer_schema: return obj -# TODO: return generator instead -def deserialize_many(data: bytes, py_type: Type[Obj], *, format: str, writer_schema: bytes = b"") -> List[Obj]: +def deserialize_many(data: bytes, py_type: Type[Obj], *, format: str, writer_schema: bytes = b"") -> Iterator[Obj]: """ - Deserialize bytes as a list of Python objects of a given type from a serialization format supported by + Deserialize bytes as an iterator over Python objects of a given type from a serialization format supported by **py-adapter** :param data: Serialized data @@ -155,7 +155,7 @@ def deserialize_many(data: bytes, py_type: Type[Obj], *, format: str, writer_sch """ deserialize_fn = py_adapter.plugin.plugin_hook(format, "deserialize_many") basic_objs = deserialize_fn(data=data, writer_schema=writer_schema) - objs = [from_basic_type(basic_obj, py_type) for basic_obj in basic_objs] + objs = (from_basic_type(basic_obj, py_type) for basic_obj in basic_objs) return objs diff --git a/src/py_adapter/plugin/__init__.py b/src/py_adapter/plugin/__init__.py index d678701..0a3f8e2 100644 --- a/src/py_adapter/plugin/__init__.py +++ b/src/py_adapter/plugin/__init__.py @@ -16,7 +16,8 @@ import functools import logging import sys -from typing import TYPE_CHECKING, List, Sequence +from collections.abc import Iterator +from typing import TYPE_CHECKING, Sequence import pluggy @@ -135,9 +136,9 @@ def deserialize(data: bytes, writer_schema: bytes) -> "py_adapter.Basic": @_hookspec(firstresult=True) -def deserialize_many(data: bytes, writer_schema: bytes) -> List["py_adapter.Basic"]: +def deserialize_many(data: bytes, writer_schema: bytes) -> Iterator["py_adapter.Basic"]: """ - Hook specification. Deserialize data as a list of objects of basic Python types + Hook specification. Deserialize data as an iterator over objects of basic Python types :param data: Bytes to deserialize :param writer_schema: Data schema used to serialize the data with, as JSON bytes. diff --git a/src/py_adapter/plugin/_avro.py b/src/py_adapter/plugin/_avro.py index 76e527c..08ab48d 100644 --- a/src/py_adapter/plugin/_avro.py +++ b/src/py_adapter/plugin/_avro.py @@ -14,7 +14,8 @@ """ import io -from typing import List, Sequence +from collections.abc import Iterator +from typing import Sequence import orjson @@ -78,14 +79,14 @@ def deserialize(data: bytes, writer_schema: bytes) -> py_adapter.Basic: data_stream = io.BytesIO(data) # TODO: add support for reader schema, if provided # TODO: add support for reader of data with embedded (writer) schema - basic_obj = fastavro.read.schemaless_reader(data_stream, writer_schema=writer_schema_obj) + basic_obj = fastavro.read.schemaless_reader(data_stream, writer_schema=writer_schema_obj, reader_schema=None) return basic_obj @py_adapter.plugin.hook -def deserialize_many(data: bytes, writer_schema: bytes) -> List[py_adapter.Basic]: +def deserialize_many(data: bytes, writer_schema: bytes) -> Iterator[py_adapter.Basic]: """ - Deserialize Avro container file format data as a list of objects of basic Python types + Deserialize Avro container file format data as an iterator over objects of basic Python types :param data: Bytes to deserialize :param writer_schema: Data schema used to serialize the data with, as JSON bytes. @@ -95,5 +96,5 @@ def deserialize_many(data: bytes, writer_schema: bytes) -> List[py_adapter.Basic # TODO: make it fail if writer_schema is provided? data_stream = io.BytesIO(data) # TODO: add support for reader schema, if provided - basic_objs = list(fastavro.read.reader(data_stream, reader_schema=None)) + basic_objs = fastavro.read.reader(data_stream, reader_schema=None) return basic_objs