From 098736baf12d0bd6dd956813c697f09442638e5b Mon Sep 17 00:00:00 2001 From: faph Date: Tue, 12 Sep 2023 12:58:22 +0100 Subject: [PATCH 1/4] Move JSON logical tests out of dataclasses test script --- tests/test_dataclass.py | 51 ----------------------------------------- tests/test_logicals.py | 20 ++++++++++++++++ 2 files changed, 20 insertions(+), 51 deletions(-) diff --git a/tests/test_dataclass.py b/tests/test_dataclass.py index 18a504d..cf5ece0 100644 --- a/tests/test_dataclass.py +++ b/tests/test_dataclass.py @@ -603,57 +603,6 @@ class PyType: assert_schema(PyType, expected, namespace="my_package.my_module") -def test_dict_json_logical_string_field(): - @dataclasses.dataclass - class PyType: - field_a: Dict[str, Any] = dataclasses.field( - metadata={"avro_adapter": {"logical_type": "json"}}, - default_factory=dict, - ) - - expected = { - "type": "record", - "name": "PyType", - "fields": [ - { - "name": "field_a", - "type": { - "type": "string", - "logicalType": "json", - }, - "default": "{}", - } - ], - } - options = pas.Option.LOGICAL_JSON_STRING - assert_schema(PyType, expected, options=options) - - -def test_dict_json_logical_bytes_field(): - @dataclasses.dataclass - class PyType: - field_a: Dict[str, Any] = dataclasses.field( - metadata={"avro_adapter": {"logical_type": "json"}}, - default_factory=dict, - ) - - expected = { - "type": "record", - "name": "PyType", - "fields": [ - { - "name": "field_a", - "type": { - "type": "bytes", - "logicalType": "json", - }, - "default": "{}", - } - ], - } - assert_schema(PyType, expected) - - def test_decimal_field_default(): @dataclasses.dataclass class PyType: diff --git a/tests/test_logicals.py b/tests/test_logicals.py index 165dd90..b1baca9 100644 --- a/tests/test_logicals.py +++ b/tests/test_logicals.py @@ -11,6 +11,7 @@ import datetime import uuid +from typing import Any, Dict import py_avro_schema as pas from py_avro_schema._testing import assert_schema @@ -112,3 +113,22 @@ def test_uuid(): "logicalType": "uuid", } assert_schema(py_type, expected) + + +def test_dict_json_logical_string_field(): + py_type = Dict[str, Any] + expected = { + "type": "string", + "logicalType": "json", + } + options = pas.Option.LOGICAL_JSON_STRING + assert_schema(py_type, expected, options=options) + + +def test_dict_json_logical_bytes_field(): + py_type = Dict[str, Any] + expected = { + "type": "bytes", + "logicalType": "json", + } + assert_schema(py_type, expected) From 4d83673a08400d29b676bd2cdd6de5bb66dd26ff Mon Sep 17 00:00:00 2001 From: faph Date: Tue, 12 Sep 2023 13:28:33 +0100 Subject: [PATCH 2/4] Handle List[Dict[str, Any]] as JSON --- src/py_avro_schema/_schemas.py | 21 ++++++++++++++++++--- tests/test_logicals.py | 21 ++++++++++++++++++++- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py index 439f09c..189d24a 100644 --- a/src/py_avro_schema/_schemas.py +++ b/src/py_avro_schema/_schemas.py @@ -278,13 +278,12 @@ def data(self, names: NamesType) -> JSONObj: class DictAsJSONSchema(Schema): - """An Avro string schema representing a Python Dict[str, Any] assuming JSON serialization""" + """An Avro string schema representing a Python Dict[str, Any] or List[Dict[str, Any]] assuming JSON serialization""" @classmethod def handles_type(cls, py_type: Type) -> bool: """Whether this schema class can represent a given Python class""" - origin = getattr(py_type, "__origin__", None) - return inspect.isclass(origin) and issubclass(origin, dict) and py_type.__args__ == (str, Any) + return _is_dict_str_any(py_type) or _is_list_dict_str_any(py_type) def data(self, names: NamesType) -> JSONObj: """Return the schema data""" @@ -901,3 +900,19 @@ def _doc_for_class(py_type: Type) -> str: return doc else: return "" + + +def _is_dict_str_any(py_type: Type) -> bool: + """Return whether a given type is ``Dict[str, Any]``""" + origin = getattr(py_type, "__origin__", None) + return inspect.isclass(origin) and issubclass(origin, dict) and py_type.__args__ == (str, Any) + + +def _is_list_dict_str_any(py_type: Type) -> bool: + """Return whether a given type is ``List[Dict[str, Any]]``""" + origin = getattr(py_type, "__origin__", None) + args = getattr(py_type, "__args__", None) + if args: + return inspect.isclass(origin) and issubclass(origin, list) and _is_dict_str_any(args[0]) + else: + return False diff --git a/tests/test_logicals.py b/tests/test_logicals.py index b1baca9..05c6dca 100644 --- a/tests/test_logicals.py +++ b/tests/test_logicals.py @@ -11,7 +11,7 @@ import datetime import uuid -from typing import Any, Dict +from typing import Any, Dict, List import py_avro_schema as pas from py_avro_schema._testing import assert_schema @@ -132,3 +132,22 @@ def test_dict_json_logical_bytes_field(): "logicalType": "json", } assert_schema(py_type, expected) + + +def test_list_json_logical_string_field(): + py_type = List[Dict[str, Any]] + expected = { + "type": "string", + "logicalType": "json", + } + options = pas.Option.LOGICAL_JSON_STRING + assert_schema(py_type, expected, options=options) + + +def test_list_json_logical_bytes_field(): + py_type = List[Dict[str, Any]] + expected = { + "type": "bytes", + "logicalType": "json", + } + assert_schema(py_type, expected) From 79248ccc914614febea8320bdc8fc29f1c1edcf5 Mon Sep 17 00:00:00 2001 From: faph Date: Tue, 12 Sep 2023 13:32:35 +0100 Subject: [PATCH 3/4] Docs for List[Dict[str, Any]] --- docs/types.rst | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/types.rst b/docs/types.rst index 8a9836b..25e7d67 100644 --- a/docs/types.rst +++ b/docs/types.rst @@ -214,6 +214,23 @@ Arbitrary Python dictionaries could be serialized as a ``bytes`` Avro schema by To support JSON serialization as *strings* instead of *bytes*, use :attr:`py_avro_schema.Option.LOGICAL_JSON_STRING`. +:class:`typing.List[typing.Dict[str, typing.Any]]` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. seealso:: + + For a "normal" Avro ``array`` schema using fully typed Python lists of dictionaries, see :ref:`types::class:`typing.sequence``. + + +| Avro schema: ``bytes`` +| Avro logical type: ``json`` + +Arbitrary lists of Python dictionaries could be serialized as a ``bytes`` Avro schema by first serializing the data as JSON. +**py-avro-schema** supports this "JSON-in-Avro" approach by adding the **custom** logical type ``json`` to a ``bytes`` schema. + +To support JSON serialization as *strings* instead of *bytes*, use :attr:`py_avro_schema.Option.LOGICAL_JSON_STRING`. + + :class:`typing.Mapping` ~~~~~~~~~~~~~~~~~~~~~~~ @@ -221,7 +238,8 @@ Avro schema: ``map`` This supports other "generic type" versions of :class:`collections.abc.Mapping`, including :class:`typing.Dict`. -Avro ``map`` schemas support **string** keys only. Map values can be any other Python type supported by **py-avro-schema**. For example, ``Dict[str, int]`` is output as: +Avro ``map`` schemas support **string** keys only. Map values can be any other Python type supported by **py-avro-schema**. +For example, ``Dict[str, int]`` is output as: .. code-block:: json From 52cfa74e48d18517ca4ffce37b8a02ca2f0f2701 Mon Sep 17 00:00:00 2001 From: faph Date: Tue, 12 Sep 2023 13:35:01 +0100 Subject: [PATCH 4/4] Remove unused import --- tests/test_dataclass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dataclass.py b/tests/test_dataclass.py index cf5ece0..b3d0a84 100644 --- a/tests/test_dataclass.py +++ b/tests/test_dataclass.py @@ -14,7 +14,7 @@ import decimal import enum import re -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional import pytest