Skip to content

Commit

Permalink
feat: add DataFrame and Series to_polars (#1803)
Browse files Browse the repository at this point in the history
* feat: add DataFrame and Series to_polars

* modin and cudf via pandas
  • Loading branch information
FBruzzesi authored Jan 13, 2025
1 parent f769897 commit ffce1b6
Show file tree
Hide file tree
Showing 13 changed files with 231 additions and 9 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
- to_native
- to_numpy
- to_pandas
- to_polars
- unique
- unpivot
- with_columns
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
- to_list
- to_numpy
- to_pandas
- to_polars
- to_native
- unique
- value_counts
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -427,6 +428,11 @@ def sort(
def to_pandas(self: Self) -> pd.DataFrame:
return self._native_frame.to_pandas()

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

return pl.from_arrow(self._native_frame) # type: ignore[return-value]

def to_numpy(self: Self) -> np.ndarray:
import numpy as np # ignore-banned-import

Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -733,6 +734,11 @@ def to_pandas(self: Self) -> pd.Series:

return pd.Series(self._native_series, name=self.name)

def to_polars(self: Self) -> pl.Series:
import polars as pl # ignore-banned-import

return pl.from_arrow(self._native_series) # type: ignore[return-value]

def is_duplicated(self: Self) -> ArrowSeries:
return self.to_frame().is_duplicated().alias(self.name)

Expand Down
22 changes: 19 additions & 3 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import numpy as np
import pandas as pd
import polars as pl
from typing_extensions import Self

from narwhals._pandas_like.group_by import PandasLikeGroupBy
Expand Down Expand Up @@ -763,12 +764,27 @@ def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any:
)
return df.to_numpy(copy=copy)

def to_pandas(self) -> Any:
def to_pandas(self: Self) -> pd.DataFrame:
if self._implementation is Implementation.PANDAS:
return self._native_frame
if self._implementation is Implementation.MODIN:
elif self._implementation is Implementation.CUDF: # pragma: no cover
return self._native_frame.to_pandas()
elif self._implementation is Implementation.MODIN:
return self._native_frame._to_pandas()
return self._native_frame.to_pandas() # pragma: no cover
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

if self._implementation is Implementation.PANDAS:
return pl.from_pandas(self._native_frame)
elif self._implementation is Implementation.CUDF: # pragma: no cover
return pl.from_pandas(self._native_frame.to_pandas())
elif self._implementation is Implementation.MODIN:
return pl.from_pandas(self._native_frame._to_pandas())
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def write_parquet(self, file: Any) -> Any:
self._native_frame.to_parquet(file)
Expand Down
22 changes: 18 additions & 4 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
if TYPE_CHECKING:
from types import ModuleType

import pandas as pd
import polars as pl
from typing_extensions import Self

from narwhals._pandas_like.dataframe import PandasLikeDataFrame
Expand Down Expand Up @@ -303,13 +305,13 @@ def arg_true(self) -> PandasLikeSeries:
def arg_min(self) -> int:
ser = self._native_series
if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
return ser.values.argmin() # type: ignore[no-any-return]
return ser.to_numpy().argmin() # type: ignore[no-any-return]
return ser.argmin() # type: ignore[no-any-return]

def arg_max(self) -> int:
ser = self._native_series
if self._implementation is Implementation.PANDAS and self._backend_version < (1,):
return ser.values.argmax() # type: ignore[no-any-return]
return ser.to_numpy().argmax() # type: ignore[no-any-return]
return ser.argmax() # type: ignore[no-any-return]

# Binary comparisons
Expand Down Expand Up @@ -837,16 +839,28 @@ def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any:
)
return s.to_numpy(dtype=dtype, copy=copy)

def to_pandas(self) -> Any:
def to_pandas(self: Self) -> pd.Series:
if self._implementation is Implementation.PANDAS:
return self._native_series
elif self._implementation is Implementation.CUDF:
elif self._implementation is Implementation.CUDF: # pragma: no cover
return self._native_series.to_pandas()
elif self._implementation is Implementation.MODIN:
return self._native_series._to_pandas()
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

def to_polars(self: Self) -> pl.DataFrame:
import polars as pl # ignore-banned-import

if self._implementation is Implementation.PANDAS:
return pl.from_pandas(self._native_series)
elif self._implementation is Implementation.CUDF: # pragma: no cover
return pl.from_pandas(self._native_series.to_pandas())
elif self._implementation is Implementation.MODIN:
return pl.from_pandas(self._native_series._to_pandas())
msg = f"Unknown implementation: {self._implementation}" # pragma: no cover
raise AssertionError(msg)

# --- descriptive ---
def is_duplicated(self: Self) -> Self:
res = self._native_series.duplicated(keep=False)
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,9 @@ def pivot(
)
return self._from_native_object(result)

def to_polars(self: Self) -> pl.DataFrame:
return self._native_frame


class PolarsLazyFrame:
def __init__(
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,9 @@ def __contains__(self: Self, other: Any) -> bool:
msg = f"Unable to compare other of type {type(other)} with series of type {self.dtype}."
raise InvalidOperationError(msg) from exc

def to_polars(self: Self) -> pl.Series:
return self._native_series

@property
def dt(self: Self) -> PolarsSeriesDateTimeNamespace:
return PolarsSeriesDateTimeNamespace(self)
Expand Down
67 changes: 67 additions & 0 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -581,6 +582,72 @@ def to_pandas(self) -> pd.DataFrame:
"""
return self._compliant_frame.to_pandas()

def to_polars(self) -> pl.DataFrame:
"""Convert this DataFrame to a polars DataFrame.
Returns:
A polars DataFrame.
Examples:
Construct pandas, Polars (eager) and PyArrow DataFrames:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoDataFrame
>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_to_polars(df_native: IntoDataFrame) -> pl.DataFrame:
... df = nw.from_native(df_native)
... return df.to_polars()
We can then pass any supported library such as pandas, Polars (eager), or
PyArrow to `agnostic_to_polars`:
>>> agnostic_to_polars(df_pd)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
│ 2 ┆ 7.0 ┆ b │
│ 3 ┆ 8.0 ┆ c │
└─────┴─────┴─────┘
>>> agnostic_to_polars(df_pl)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
│ 2 ┆ 7.0 ┆ b │
│ 3 ┆ 8.0 ┆ c │
└─────┴─────┴─────┘
>>> agnostic_to_polars(df_pa)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
│ 2 ┆ 7.0 ┆ b │
│ 3 ┆ 8.0 ┆ c │
└─────┴─────┴─────┘
"""
return self._compliant_frame.to_polars() # type: ignore[no-any-return]

@overload
def write_csv(self, file: None = None) -> str: ...

Expand Down
61 changes: 59 additions & 2 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pyarrow as pa
from typing_extensions import Self

Expand Down Expand Up @@ -2764,8 +2765,8 @@ def to_numpy(self) -> np.ndarray:
"""
return self._compliant_series.to_numpy()

def to_pandas(self) -> pd.Series:
"""Convert to pandas.
def to_pandas(self: Self) -> pd.Series:
"""Convert to pandas Series.
Returns:
A pandas Series containing the data from this Series.
Expand Down Expand Up @@ -2811,6 +2812,62 @@ def to_pandas(self) -> pd.Series:
"""
return self._compliant_series.to_pandas()

def to_polars(self: Self) -> pl.Series:
"""Convert to polars Series.
Returns:
A polars Series containing the data from this Series.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoSeries
>>> data = [1, 2, 3]
>>> s_pd = pd.Series(data, name="a")
>>> s_pl = pl.Series("a", data)
>>> s_pa = pa.chunked_array([data])
We define a library agnostic function:
>>> def agnostic_to_polars(s_native: IntoSeries) -> pd.Series:
... s = nw.from_native(s_native, series_only=True)
... return s.to_polars()
We can then pass any supported library such as pandas, Polars, or
PyArrow to `agnostic_to_polars`:
>>> agnostic_to_polars(s_pd) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: 'a' [i64]
[
1
2
3
]
>>> agnostic_to_polars(s_pl) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: 'a' [i64]
[
1
2
3
]
>>> agnostic_to_polars(s_pa) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: '' [i64]
[
1
2
3
]
"""
return self._compliant_series.to_polars() # type: ignore[no-any-return]

def __add__(self, other: object) -> Self:
return self._from_compliant_series(
self._compliant_series.__add__(self._extract_native(other))
Expand Down
23 changes: 23 additions & 0 deletions tests/frame/to_polars_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import polars as pl
import pytest
from polars.testing import assert_frame_equal

import narwhals.stable.v1 as nw

if TYPE_CHECKING:
from tests.utils import ConstructorEager


@pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
def test_convert_polars(constructor_eager: ConstructorEager) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}
df_raw = constructor_eager(data)
result = nw.from_native(df_raw).to_polars() # type: ignore[union-attr]

expected = pl.DataFrame(data)

assert_frame_equal(result, expected)
24 changes: 24 additions & 0 deletions tests/series_only/to_polars_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import polars as pl
from polars.testing import assert_series_equal

import narwhals.stable.v1 as nw

if TYPE_CHECKING:
from tests.utils import ConstructorEager

data = [1, 3, 2]


def test_series_to_polars(constructor_eager: ConstructorEager) -> None:
result = (
nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
.alias("a")
.to_polars()
)

expected = pl.Series("a", data)
assert_series_equal(result, expected)
1 change: 1 addition & 0 deletions utils/check_api_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"to_native",
"to_numpy",
"to_pandas",
"to_polars",
"value_counts",
"zip_with",
"__iter__",
Expand Down

0 comments on commit ffce1b6

Please sign in to comment.