Skip to content

Commit

Permalink
actually, keep unique and drop_nulls
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Jan 15, 2025
1 parent 0b75d85 commit 655616e
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 185 deletions.
238 changes: 57 additions & 181 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from narwhals.expr_str import ExprStringNamespace
from narwhals.utils import _validate_rolling_arguments
from narwhals.utils import flatten
from narwhals.utils import issue_deprecation_warning

if TYPE_CHECKING:
from typing_extensions import Self
Expand Down Expand Up @@ -2394,6 +2395,13 @@ def sample(
) -> Self:
"""Sample randomly from this expression.
!!! warning
`Expr.sample` is deprecated and will be removed in a future version.
Hint: instead of `df.select(nw.col('a').sample())`, use
`df.select(nw.col('a')).sample()` instead.
Note: this will remain available in `narwhals.stable.v1`.
See [stable api](../backcompat.md/) for more information.
Arguments:
n: Number of items to return. Cannot be used with fraction.
fraction: Fraction of items to return. Cannot be used with n.
Expand All @@ -2403,54 +2411,14 @@ def sample(
Returns:
A new expression.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_sample(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").sample(fraction=1.0, with_replacement=True)
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to `agnostic_sample`:
>>> agnostic_sample(df_pd) # doctest: +SKIP
a
2 3
0 1
2 3
>>> agnostic_sample(df_pl) # doctest: +SKIP
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 2 │
│ 3 │
│ 3 │
└─────┘
>>> agnostic_sample(df_pa) # doctest: +SKIP
pyarrow.Table
a: int64
----
a: [[1,3,3]]
"""
msg = (
"`Expr.sample` is deprecated and will be removed in a future version.\n\n"
"Hint: instead of `df.select(nw.col('a').sample())`, use `df.select(nw.col('a')).sample()`.\n\n"
"Note: this will remain available in `narwhals.stable.v1`.\n"
"See [stable api](../backcompat.md/) for more information.\n"
)
issue_deprecation_warning(msg, _version="1.22.0")
return self.__class__(
lambda plx: self._to_compliant_expr(plx).sample(
n, fraction=fraction, with_replacement=with_replacement, seed=seed
Expand Down Expand Up @@ -2907,113 +2875,51 @@ def quantile(
def head(self, n: int = 10) -> Self:
r"""Get the first `n` rows.
!!! warning
`Expr.head` is deprecated and will be removed in a future version.
Hint: instead of `df.select(nw.col('a').head())`, use
`df.select(nw.col('a')).head()` instead.
Note: this will remain available in `narwhals.stable.v1`.
See [stable api](../backcompat.md/) for more information.
Arguments:
n: Number of rows to return.
Returns:
A new expression.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that returns the first 3 rows:
>>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").head(3)).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to `agnostic_head`:
>>> agnostic_head(df_pd)
a
0 0
1 1
2 2
>>> agnostic_head(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 0 │
│ 1 │
│ 2 │
└─────┘
>>> agnostic_head(df_pa)
pyarrow.Table
a: int64
----
a: [[0,1,2]]
"""
msg = (
"`Expr.head` is deprecated and will be removed in a future version.\n\n"
"Hint: instead of `df.select(nw.col('a').head())`, use `df.select(nw.col('a')).head()`.\n\n"
"Note: this will remain available in `narwhals.stable.v1`.\n"
"See [stable api](../backcompat.md/) for more information.\n"
)
issue_deprecation_warning(msg, _version="1.22.0")
return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n))

def tail(self, n: int = 10) -> Self:
r"""Get the last `n` rows.
!!! warning
`Expr.tail` is deprecated and will be removed in a future version.
Hint: instead of `df.select(nw.col('a').tail())`, use
`df.select(nw.col('a')).tail()` instead.
Note: this will remain available in `narwhals.stable.v1`.
See [stable api](../backcompat.md/) for more information.
Arguments:
n: Number of rows to return.
Returns:
A new expression.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that returns the last 3 rows:
>>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").tail(3)).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to `agnostic_tail`:
>>> agnostic_tail(df_pd)
a
7 7
8 8
9 9
>>> agnostic_tail(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 7 │
│ 8 │
│ 9 │
└─────┘
>>> agnostic_tail(df_pa)
pyarrow.Table
a: int64
----
a: [[7,8,9]]
"""
msg = (
"`Expr.tail` is deprecated and will be removed in a future version.\n\n"
"Hint: instead of `df.select(nw.col('a').tail())`, use `df.select(nw.col('a')).tail()`.\n\n"
"Note: this will remain available in `narwhals.stable.v1`.\n"
"See [stable api](../backcompat.md/) for more information.\n"
)
issue_deprecation_warning(msg, _version="1.22.0")
return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n))

def round(self, decimals: int = 0) -> Self:
Expand Down Expand Up @@ -3141,57 +3047,27 @@ def len(self) -> Self:
def gather_every(self: Self, n: int, offset: int = 0) -> Self:
r"""Take every nth value in the Series and return as new Series.
!!! warning
`Expr.gather_every` is deprecated and will be removed in a future version.
Hint: instead of `df.select(nw.col('a').gather_every())`, use
`df.select(nw.col('a')).gather_every()` instead.
Note: this will remain available in `narwhals.stable.v1`.
See [stable api](../backcompat.md/) for more information.
Arguments:
n: Gather every *n*-th row.
offset: Starting index.
Returns:
A new expression.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function in which gather every 2 rows,
starting from a offset of 1:
>>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to `agnostic_gather_every`:
>>> agnostic_gather_every(df_pd)
a
1 2
3 4
>>> agnostic_gather_every(df_pl)
shape: (2, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 4 │
└─────┘
>>> agnostic_gather_every(df_pa)
pyarrow.Table
a: int64
----
a: [[2,4]]
"""
msg = (
"`Expr.gather_every` is deprecated and will be removed in a future version.\n\n"
"Hint: instead of `df.select(nw.col('a').gather_every())`, use `df.select(nw.col('a')).gather_every()`.\n\n"
"Note: this will remain available in `narwhals.stable.v1`.\n"
"See [stable api](../backcompat.md/) for more information.\n"
)
issue_deprecation_warning(msg, _version="1.22.0")
return self.__class__(
lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset)
)
Expand Down
36 changes: 36 additions & 0 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,42 @@ def rolling_std(
ddof=ddof,
)

def head(self, n: int = 10) -> Self:
r"""Get the first `n` rows.
Arguments:
n: Number of rows to return.
Returns:
A new expression.
"""
return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n))

def tail(self, n: int = 10) -> Self:
r"""Get the last `n` rows.
Arguments:
n: Number of rows to return.
Returns:
A new expression.
"""
return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n))

def gather_every(self: Self, n: int, offset: int = 0) -> Self:
r"""Take every nth value in the Series and return as new Series.
Arguments:
n: Gather every *n*-th row.
offset: Starting index.
Returns:
A new expression.
"""
return self.__class__(
lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset)
)


class Schema(NwSchema):
"""Ordered mapping of column names to their data type.
Expand Down
4 changes: 4 additions & 0 deletions tests/expr_and_series/gather_every_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest

import narwhals as nw_main
import narwhals.stable.v1 as nw
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data
Expand All @@ -21,6 +22,9 @@ def test_gather_every_expr(

assert_equal_data(result, expected)

with pytest.deprecated_call():
df.select(nw_main.col("a").gather_every(n=n, offset=offset))


@pytest.mark.parametrize("n", [1, 2, 3])
@pytest.mark.parametrize("offset", [1, 2, 3])
Expand Down
6 changes: 5 additions & 1 deletion tests/expr_and_series/head_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import pytest

import narwhals as nw
import narwhals as nw_main
import narwhals.stable.v1 as nw
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data

Expand All @@ -18,6 +19,9 @@ def test_head(
expected = {"a": [1, 2]}
assert_equal_data(result, expected)

with pytest.deprecated_call():
df.select(nw_main.col("a").head(5))


@pytest.mark.parametrize("n", [2, -1])
def test_head_series(constructor_eager: ConstructorEager, n: int) -> None:
Expand Down
4 changes: 4 additions & 0 deletions tests/expr_and_series/sample_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest

import narwhals as nw_main
import narwhals.stable.v1 as nw
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data
Expand All @@ -18,6 +19,9 @@ def test_expr_sample(constructor_eager: ConstructorEager) -> None:
expected_series = (2,)
assert result_series == expected_series

with pytest.deprecated_call():
df.select(nw_main.col("a").sample(n=2))


def test_expr_sample_fraction(
constructor_eager: ConstructorEager, request: pytest.FixtureRequest
Expand Down
Loading

0 comments on commit 655616e

Please sign in to comment.