Skip to content

Commit

Permalink
test(python): Add tests for data mismatch on read_json (#19425)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Oct 24, 2024
1 parent 29c34c4 commit b9084b7
Showing 1 changed file with 55 additions and 0 deletions.
55 changes: 55 additions & 0 deletions py-polars/tests/unit/io/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pytest

import polars as pl
from polars.exceptions import ComputeError
from polars.testing import assert_frame_equal


Expand Down Expand Up @@ -454,3 +455,57 @@ def test_zfs_json_roundtrip(size: int) -> None:

f.seek(0)
assert_frame_equal(a, pl.read_json(f))


def test_read_json_raise_on_data_type_mismatch() -> None:
with pytest.raises(ComputeError):
pl.read_json(
b"""\
[
{"a": null},
{"a": 1}
]
""",
infer_schema_length=1,
)


def test_read_json_struct_schema() -> None:
with pytest.raises(ComputeError, match="extra key in struct data: b"):
pl.read_json(
b"""\
[
{"a": 1},
{"a": 2, "b": 2}
]
""",
infer_schema_length=1,
)

assert_frame_equal(
pl.read_json(
b"""\
[
{"a": 1},
{"a": 2, "b": 2}
]
""",
infer_schema_length=2,
),
pl.DataFrame({"a": [1, 2], "b": [None, 2]}),
)

# If the schema was explicitly given, then we ignore extra fields.
# TODO: There should be a `columns=` parameter to this.
assert_frame_equal(
pl.read_json(
b"""\
[
{"a": 1},
{"a": 2, "b": 2}
]
""",
schema={"a": pl.Int64},
),
pl.DataFrame({"a": [1, 2]}),
)

0 comments on commit b9084b7

Please sign in to comment.