Skip to content

Commit

Permalink
GHXXX Add overloads of engine for pd.read_json
Browse files Browse the repository at this point in the history
  • Loading branch information
loicdiridollou committed Nov 16, 2024
1 parent 0ab562c commit 7603dde
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 1 deletion.
81 changes: 80 additions & 1 deletion pandas-stubs/io/json/_json.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,42 @@ def read_json(
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
| None
) = ...,
lines: bool,
chunksize: int,
compression: CompressionOptions = ...,
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["ujson"] = ...,
) -> JsonReader[Series]: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonSeriesOrient | None = ...,
typ: Literal["series"],
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: TimeUnit | None = ...,
encoding: str | None = ...,
encoding_errors: (
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
| None
) = ...,
lines: Literal[True],
chunksize: int,
compression: CompressionOptions = ...,
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["pyarrow"] = ...,
) -> JsonReader[Series]: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonFrameOrient | None = ...,
typ: Literal["frame"] = ...,
Expand All @@ -72,6 +98,7 @@ def read_json(
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["pyarrow"] = ...,
) -> JsonReader[DataFrame]: ...
@overload
def read_json(
Expand All @@ -96,6 +123,32 @@ def read_json(
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["ujson"] = ...,
) -> Series: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonSeriesOrient | None = ...,
typ: Literal["series"],
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: TimeUnit | None = ...,
encoding: str | None = ...,
encoding_errors: (
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
| None
) = ...,
lines: Literal[True] = ...,
chunksize: None = ...,
compression: CompressionOptions = ...,
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["pyarrow"] = ...,
) -> Series: ...
@overload
def read_json(
Expand All @@ -120,6 +173,32 @@ def read_json(
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["ujson"] = ...,
) -> DataFrame: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonFrameOrient | None = ...,
typ: Literal["frame"] = ...,
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: TimeUnit | None = ...,
encoding: str | None = ...,
encoding_errors: (
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
| None
) = ...,
lines: Literal[True] = ...,
chunksize: None = ...,
compression: CompressionOptions = ...,
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["pyarrow"] = ...,
) -> DataFrame: ...

class JsonReader(abc.Iterator, Generic[NDFrameT]):
Expand Down
25 changes: 25 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1625,3 +1625,28 @@ def test_read_excel_index_col() -> None:
),
pd.DataFrame,
)


def test_read_json_engine() -> None:
"""Test the engine argument for `pd.read_json` introduced with pandas 2.0."""
data = """{"index": {"0": 0, "1": 1},
"a": {"0": 1, "1": null},
"b": {"0": 2.5, "1": 4.5},
"c": {"0": true, "1": false},
"d": {"0": "a", "1": "b"},
"e": {"0": 1577.2, "1": 1577.1}}"""
check(
assert_type(pd.read_json(io.StringIO(data), engine="ujson"), pd.DataFrame),
pd.DataFrame,
)

data_lines = b"""{"col 1":"a","col 2":"b"}
{"col 1":"c","col 2":"d"}"""
dd = io.BytesIO(data_lines)
check(
assert_type(
pd.read_json(dd, lines=True, engine="pyarrow"),
pd.DataFrame,
),
pd.DataFrame,
)

0 comments on commit 7603dde

Please sign in to comment.