Skip to content

Commit

Permalink
Deserialize many to return an iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
faph committed Sep 16, 2023
1 parent ed018ca commit 72260ba
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 12 deletions.
8 changes: 4 additions & 4 deletions src/py_adapter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import inspect
import logging
import uuid
from collections.abc import Iterator
from typing import (
Any,
Callable,
Expand Down Expand Up @@ -142,10 +143,9 @@ def deserialize(data: bytes, py_type: Type[Obj], *, format: str, writer_schema:
return obj


# TODO: return generator instead
def deserialize_many(data: bytes, py_type: Type[Obj], *, format: str, writer_schema: bytes = b"") -> List[Obj]:
def deserialize_many(data: bytes, py_type: Type[Obj], *, format: str, writer_schema: bytes = b"") -> Iterator[Obj]:
"""
Deserialize bytes as a list of Python objects of a given type from a serialization format supported by
Deserialize bytes as an iterator over Python objects of a given type from a serialization format supported by
**py-adapter**
:param data: Serialized data
Expand All @@ -155,7 +155,7 @@ def deserialize_many(data: bytes, py_type: Type[Obj], *, format: str, writer_sch
"""
deserialize_fn = py_adapter.plugin.plugin_hook(format, "deserialize_many")
basic_objs = deserialize_fn(data=data, writer_schema=writer_schema)
objs = [from_basic_type(basic_obj, py_type) for basic_obj in basic_objs]
objs = (from_basic_type(basic_obj, py_type) for basic_obj in basic_objs)
return objs


Expand Down
7 changes: 4 additions & 3 deletions src/py_adapter/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
import functools
import logging
import sys
from typing import TYPE_CHECKING, List, Sequence
from collections.abc import Iterator
from typing import TYPE_CHECKING, Sequence

import pluggy

Expand Down Expand Up @@ -135,9 +136,9 @@ def deserialize(data: bytes, writer_schema: bytes) -> "py_adapter.Basic":


@_hookspec(firstresult=True)
def deserialize_many(data: bytes, writer_schema: bytes) -> List["py_adapter.Basic"]:
def deserialize_many(data: bytes, writer_schema: bytes) -> Iterator["py_adapter.Basic"]:
"""
Hook specification. Deserialize data as a list of objects of basic Python types
Hook specification. Deserialize data as an iterator over objects of basic Python types
:param data: Bytes to deserialize
:param writer_schema: Data schema used to serialize the data with, as JSON bytes.
Expand Down
11 changes: 6 additions & 5 deletions src/py_adapter/plugin/_avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"""

import io
from typing import List, Sequence
from collections.abc import Iterator
from typing import Sequence

import orjson

Expand Down Expand Up @@ -78,14 +79,14 @@ def deserialize(data: bytes, writer_schema: bytes) -> py_adapter.Basic:
data_stream = io.BytesIO(data)
# TODO: add support for reader schema, if provided
# TODO: add support for reader of data with embedded (writer) schema
basic_obj = fastavro.read.schemaless_reader(data_stream, writer_schema=writer_schema_obj)
basic_obj = fastavro.read.schemaless_reader(data_stream, writer_schema=writer_schema_obj, reader_schema=None)
return basic_obj


@py_adapter.plugin.hook
def deserialize_many(data: bytes, writer_schema: bytes) -> List[py_adapter.Basic]:
def deserialize_many(data: bytes, writer_schema: bytes) -> Iterator[py_adapter.Basic]:
"""
Deserialize Avro container file format data as a list of objects of basic Python types
Deserialize Avro container file format data as an iterator over objects of basic Python types
:param data: Bytes to deserialize
:param writer_schema: Data schema used to serialize the data with, as JSON bytes.
Expand All @@ -95,5 +96,5 @@ def deserialize_many(data: bytes, writer_schema: bytes) -> List[py_adapter.Basic
# TODO: make it fail if writer_schema is provided?
data_stream = io.BytesIO(data)
# TODO: add support for reader schema, if provided
basic_objs = list(fastavro.read.reader(data_stream, reader_schema=None))
basic_objs = fastavro.read.reader(data_stream, reader_schema=None)
return basic_objs

0 comments on commit 72260ba

Please sign in to comment.