Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add DataFrame and Series to_polars #1803

Merged
merged 2 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
- to_native
- to_numpy
- to_pandas
- to_polars
- unique
- unpivot
- with_columns
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
- to_list
- to_numpy
- to_pandas
- to_polars
- to_native
- unique
- value_counts
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -427,6 +428,11 @@ def sort(
def to_pandas(self: Self) -> pd.DataFrame:
return self._native_frame.to_pandas()

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

return pl.from_arrow(self._native_frame) # type: ignore[return-value]

def to_numpy(self: Self) -> np.ndarray:
import numpy as np # ignore-banned-import

Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -733,6 +734,11 @@ def to_pandas(self: Self) -> pd.Series:

return pd.Series(self._native_series, name=self.name)

def to_polars(self: Self) -> pl.Series:
import polars as pl # ignore-banned-import

return pl.from_arrow(self._native_series) # type: ignore[return-value]

def is_duplicated(self: Self) -> ArrowSeries:
return self.to_frame().is_duplicated().alias(self.name)

Expand Down
22 changes: 19 additions & 3 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import numpy as np
import pandas as pd
import polars as pl
from typing_extensions import Self

from narwhals._pandas_like.group_by import PandasLikeGroupBy
Expand Down Expand Up @@ -763,12 +764,27 @@ def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any:
)
return df.to_numpy(copy=copy)

def to_pandas(self) -> Any:
def to_pandas(self: Self) -> pd.DataFrame:
if self._implementation is Implementation.PANDAS:
return self._native_frame
if self._implementation is Implementation.MODIN:
elif self._implementation is Implementation.CUDF: # pragma: no cover
return self._native_frame.to_pandas()
elif self._implementation is Implementation.MODIN:
return self._native_frame._to_pandas()
return self._native_frame.to_pandas() # pragma: no cover
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

if self._implementation is Implementation.PANDAS:
return pl.from_pandas(self._native_frame)
elif self._implementation is Implementation.CUDF: # pragma: no cover
return pl.from_pandas(self._native_frame.to_pandas())
elif self._implementation is Implementation.MODIN:
return pl.from_pandas(self._native_frame._to_pandas())
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def write_parquet(self, file: Any) -> Any:
self._native_frame.to_parquet(file)
Expand Down
22 changes: 18 additions & 4 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
if TYPE_CHECKING:
from types import ModuleType

import pandas as pd
import polars as pl
from typing_extensions import Self

from narwhals._pandas_like.dataframe import PandasLikeDataFrame
Expand Down Expand Up @@ -303,13 +305,13 @@ def arg_true(self) -> PandasLikeSeries:
def arg_min(self) -> int:
ser = self._native_series
if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
return ser.values.argmin() # type: ignore[no-any-return]
return ser.to_numpy().argmin() # type: ignore[no-any-return]
return ser.argmin() # type: ignore[no-any-return]

def arg_max(self) -> int:
ser = self._native_series
if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
return ser.values.argmax() # type: ignore[no-any-return]
return ser.to_numpy().argmax() # type: ignore[no-any-return]
return ser.argmax() # type: ignore[no-any-return]

# Binary comparisons
Expand Down Expand Up @@ -837,16 +839,28 @@ def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any:
)
return s.to_numpy(dtype=dtype, copy=copy)

def to_pandas(self) -> Any:
def to_pandas(self: Self) -> pd.Series:
if self._implementation is Implementation.PANDAS:
return self._native_series
elif self._implementation is Implementation.CUDF:
elif self._implementation is Implementation.CUDF: # pragma: no cover
return self._native_series.to_pandas()
elif self._implementation is Implementation.MODIN:
return self._native_series._to_pandas()
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

if self._implementation is Implementation.PANDAS:
return pl.from_pandas(self._native_series)
elif self._implementation is Implementation.CUDF: # pragma: no cover
return pl.from_pandas(self._native_series.to_pandas())
elif self._implementation is Implementation.MODIN:
return pl.from_pandas(self._native_series._to_pandas())
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

# --- descriptive ---
def is_duplicated(self: Self) -> Self:
res = self._native_series.duplicated(keep=False)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,9 @@ def pivot(
)
return self._from_native_object(result)

def to_polars(self: Self) -> pl.DataFrame:
return self._native_frame


class PolarsLazyFrame:
def __init__(
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,9 @@ def __contains__(self: Self, other: Any) -> bool:
msg = f"Unable to compare other of type {type(other)} with series of type {self.dtype}."
raise InvalidOperationError(msg) from exc

def to_polars(self: Self) -> pl.Series:
return self._native_series

@property
def dt(self: Self) -> PolarsSeriesDateTimeNamespace:
return PolarsSeriesDateTimeNamespace(self)
Expand Down
67 changes: 67 additions & 0 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -581,6 +582,72 @@ def to_pandas(self) -> pd.DataFrame:
"""
return self._compliant_frame.to_pandas()

def to_polars(self) -> pl.DataFrame:
"""Convert this DataFrame to a polars DataFrame.

Returns:
A polars DataFrame.

Examples:
Construct pandas, Polars (eager) and PyArrow DataFrames:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoDataFrame
>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_to_polars(df_native: IntoDataFrame) -> pl.DataFrame:
... df = nw.from_native(df_native)
... return df.to_polars()

We can then pass any supported library such as pandas, Polars (eager), or
PyArrow to `agnostic_to_polars`:

>>> agnostic_to_polars(df_pd)
shape: (3, 3)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ foo ┆ bar ┆ ham β”‚
β”‚ --- ┆ --- ┆ --- β”‚
β”‚ i64 ┆ f64 ┆ str β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════║
β”‚ 1 ┆ 6.0 ┆ a β”‚
β”‚ 2 ┆ 7.0 ┆ b β”‚
β”‚ 3 ┆ 8.0 ┆ c β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜

>>> agnostic_to_polars(df_pl)
shape: (3, 3)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ foo ┆ bar ┆ ham β”‚
β”‚ --- ┆ --- ┆ --- β”‚
β”‚ i64 ┆ f64 ┆ str β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════║
β”‚ 1 ┆ 6.0 ┆ a β”‚
β”‚ 2 ┆ 7.0 ┆ b β”‚
β”‚ 3 ┆ 8.0 ┆ c β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜

>>> agnostic_to_polars(df_pa)
shape: (3, 3)
β”Œβ”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”
β”‚ foo ┆ bar ┆ ham β”‚
β”‚ --- ┆ --- ┆ --- β”‚
β”‚ i64 ┆ f64 ┆ str β”‚
β•žβ•β•β•β•β•β•ͺ═════β•ͺ═════║
β”‚ 1 ┆ 6.0 ┆ a β”‚
β”‚ 2 ┆ 7.0 ┆ b β”‚
β”‚ 3 ┆ 8.0 ┆ c β”‚
β””β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”˜
"""
return self._compliant_frame.to_polars() # type: ignore[no-any-return]

@overload
def write_csv(self, file: None = None) -> str: ...

Expand Down
61 changes: 59 additions & 2 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -2764,8 +2765,8 @@ def to_numpy(self) -> np.ndarray:
"""
return self._compliant_series.to_numpy()

def to_pandas(self) -> pd.Series:
"""Convert to pandas.
def to_pandas(self: Self) -> pd.Series:
"""Convert to pandas Series.

Returns:
A pandas Series containing the data from this Series.
Expand Down Expand Up @@ -2811,6 +2812,62 @@ def to_pandas(self) -> pd.Series:
"""
return self._compliant_series.to_pandas()

def to_polars(self: Self) -> pl.Series:
"""Convert to polars Series.

Returns:
A polars Series containing the data from this Series.

Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoSeries

>>> data = [1, 2, 3]
>>> s_pd = pd.Series(data, name="a")
>>> s_pl = pl.Series("a", data)
>>> s_pa = pa.chunked_array([data])

We define a library agnostic function:

>>> def agnostic_to_polars(s_native: IntoSeries) -> pd.Series:
... s = nw.from_native(s_native, series_only=True)
... return s.to_polars()

We can then pass any supported library such as pandas, Polars, or
PyArrow to `agnostic_to_polars`:

>>> agnostic_to_polars(s_pd) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: 'a' [i64]
[
1
2
3
]

>>> agnostic_to_polars(s_pl) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: 'a' [i64]
[
1
2
3
]

>>> agnostic_to_polars(s_pa) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: '' [i64]
[
1
2
3
]
"""
return self._compliant_series.to_polars() # type: ignore[no-any-return]

def __add__(self, other: object) -> Self:
return self._from_compliant_series(
self._compliant_series.__add__(self._extract_native(other))
Expand Down
23 changes: 23 additions & 0 deletions tests/frame/to_polars_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import polars as pl
import pytest
from polars.testing import assert_frame_equal

import narwhals.stable.v1 as nw

if TYPE_CHECKING:
from tests.utils import ConstructorEager


@pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
def test_convert_polars(constructor_eager: ConstructorEager) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}
df_raw = constructor_eager(data)
result = nw.from_native(df_raw).to_polars() # type: ignore[union-attr]

expected = pl.DataFrame(data)

assert_frame_equal(result, expected)
24 changes: 24 additions & 0 deletions tests/series_only/to_polars_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import polars as pl
from polars.testing import assert_series_equal

import narwhals.stable.v1 as nw

if TYPE_CHECKING:
from tests.utils import ConstructorEager

data = [1, 3, 2]


def test_series_to_polars(constructor_eager: ConstructorEager) -> None:
result = (
nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
.alias("a")
.to_polars()
)

expected = pl.Series("a", data)
assert_series_equal(result, expected)
1 change: 1 addition & 0 deletions utils/check_api_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"to_native",
"to_numpy",
"to_pandas",
"to_polars",
"value_counts",
"zip_with",
"__iter__",
Expand Down
Loading