actually, keep unique and drop_nulls

narwhals-dev · Jan 15, 2025 · 655616e · 655616e
1 parent 0b75d85
commit 655616e
Show file tree

Hide file tree

Showing 6 changed files with 113 additions and 185 deletions.
diff --git a/narwhals/expr.py b/narwhals/expr.py
@@ -17,6 +17,7 @@
 from narwhals.expr_str import ExprStringNamespace
 from narwhals.utils import _validate_rolling_arguments
 from narwhals.utils import flatten
+from narwhals.utils import issue_deprecation_warning
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -2394,6 +2395,13 @@ def sample(
     ) -> Self:
         """Sample randomly from this expression.
 
+        !!! warning
+            `Expr.sample` is deprecated and will be removed in a future version.
+            Hint: instead of `df.select(nw.col('a').sample())`, use
+            `df.select(nw.col('a')).sample()` instead.
+            Note: this will remain available in `narwhals.stable.v1`.
+            See [stable api](../backcompat.md/) for more information.
+
         Arguments:
             n: Number of items to return. Cannot be used with fraction.
             fraction: Fraction of items to return. Cannot be used with n.
@@ -2403,54 +2411,14 @@ def sample(
 
         Returns:
             A new expression.
-
-        Examples:
-            >>> import pandas as pd
-            >>> import polars as pl
-            >>> import pyarrow as pa
-            >>> import narwhals as nw
-            >>> from narwhals.typing import IntoFrameT
-            >>>
-            >>> data = {"a": [1, 2, 3]}
-            >>> df_pd = pd.DataFrame(data)
-            >>> df_pl = pl.DataFrame(data)
-            >>> df_pa = pa.table(data)
-
-            Let's define a dataframe-agnostic function:
-
-            >>> def agnostic_sample(df_native: IntoFrameT) -> IntoFrameT:
-            ...     df = nw.from_native(df_native)
-            ...     return df.select(
-            ...         nw.col("a").sample(fraction=1.0, with_replacement=True)
-            ...     ).to_native()
-
-            We can then pass any supported library such as pandas, Polars, or
-            PyArrow to `agnostic_sample`:
-
-            >>> agnostic_sample(df_pd)  # doctest: +SKIP
-               a
-            2  3
-            0  1
-            2  3
-
-            >>> agnostic_sample(df_pl)  # doctest: +SKIP
-            shape: (3, 1)
-            ┌─────┐
-            │ a   │
-            │ --- │
-            │ f64 │
-            ╞═════╡
-            │ 2   │
-            │ 3   │
-            │ 3   │
-            └─────┘
-
-            >>> agnostic_sample(df_pa)  # doctest: +SKIP
-            pyarrow.Table
-            a: int64
-            ----
-            a: [[1,3,3]]
         """
+        msg = (
+            "`Expr.sample` is deprecated and will be removed in a future version.\n\n"
+            "Hint: instead of `df.select(nw.col('a').sample())`, use `df.select(nw.col('a')).sample()`.\n\n"
+            "Note: this will remain available in `narwhals.stable.v1`.\n"
+            "See [stable api](../backcompat.md/) for more information.\n"
+        )
+        issue_deprecation_warning(msg, _version="1.22.0")
         return self.__class__(
             lambda plx: self._to_compliant_expr(plx).sample(
                 n, fraction=fraction, with_replacement=with_replacement, seed=seed
@@ -2907,113 +2875,51 @@ def quantile(
     def head(self, n: int = 10) -> Self:
         r"""Get the first `n` rows.
 
+        !!! warning
+            `Expr.head` is deprecated and will be removed in a future version.
+            Hint: instead of `df.select(nw.col('a').head())`, use
+            `df.select(nw.col('a')).head()` instead.
+            Note: this will remain available in `narwhals.stable.v1`.
+            See [stable api](../backcompat.md/) for more information.
+
         Arguments:
             n: Number of rows to return.
 
         Returns:
             A new expression.
-
-        Examples:
-            >>> import pandas as pd
-            >>> import polars as pl
-            >>> import pyarrow as pa
-            >>> import narwhals as nw
-            >>> from narwhals.typing import IntoFrameT
-            >>>
-            >>> data = {"a": list(range(10))}
-            >>> df_pd = pd.DataFrame(data)
-            >>> df_pl = pl.DataFrame(data)
-            >>> df_pa = pa.table(data)
-
-            Let's define a dataframe-agnostic function that returns the first 3 rows:
-
-            >>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT:
-            ...     df = nw.from_native(df_native)
-            ...     return df.select(nw.col("a").head(3)).to_native()
-
-            We can then pass any supported library such as pandas, Polars, or
-            PyArrow to `agnostic_head`:
-
-            >>> agnostic_head(df_pd)
-               a
-            0  0
-            1  1
-            2  2
-
-            >>> agnostic_head(df_pl)
-            shape: (3, 1)
-            ┌─────┐
-            │ a   │
-            │ --- │
-            │ i64 │
-            ╞═════╡
-            │ 0   │
-            │ 1   │
-            │ 2   │
-            └─────┘
-
-            >>> agnostic_head(df_pa)
-            pyarrow.Table
-            a: int64
-            ----
-            a: [[0,1,2]]
         """
+        msg = (
+            "`Expr.head` is deprecated and will be removed in a future version.\n\n"
+            "Hint: instead of `df.select(nw.col('a').head())`, use `df.select(nw.col('a')).head()`.\n\n"
+            "Note: this will remain available in `narwhals.stable.v1`.\n"
+            "See [stable api](../backcompat.md/) for more information.\n"
+        )
+        issue_deprecation_warning(msg, _version="1.22.0")
         return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n))
 
     def tail(self, n: int = 10) -> Self:
         r"""Get the last `n` rows.
 
+        !!! warning
+            `Expr.tail` is deprecated and will be removed in a future version.
+            Hint: instead of `df.select(nw.col('a').tail())`, use
+            `df.select(nw.col('a')).tail()` instead.
+            Note: this will remain available in `narwhals.stable.v1`.
+            See [stable api](../backcompat.md/) for more information.
+
         Arguments:
             n: Number of rows to return.
 
         Returns:
             A new expression.
-
-        Examples:
-            >>> import pandas as pd
-            >>> import polars as pl
-            >>> import pyarrow as pa
-            >>> import narwhals as nw
-            >>> from narwhals.typing import IntoFrameT
-            >>>
-            >>> data = {"a": list(range(10))}
-            >>> df_pd = pd.DataFrame(data)
-            >>> df_pl = pl.DataFrame(data)
-            >>> df_pa = pa.table(data)
-
-            Let's define a dataframe-agnostic function that returns the last 3 rows:
-
-            >>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT:
-            ...     df = nw.from_native(df_native)
-            ...     return df.select(nw.col("a").tail(3)).to_native()
-
-            We can then pass any supported library such as pandas, Polars, or
-            PyArrow to `agnostic_tail`:
-
-            >>> agnostic_tail(df_pd)
-               a
-            7  7
-            8  8
-            9  9
-
-            >>> agnostic_tail(df_pl)
-            shape: (3, 1)
-            ┌─────┐
-            │ a   │
-            │ --- │
-            │ i64 │
-            ╞═════╡
-            │ 7   │
-            │ 8   │
-            │ 9   │
-            └─────┘
-
-            >>> agnostic_tail(df_pa)
-            pyarrow.Table
-            a: int64
-            ----
-            a: [[7,8,9]]
         """
+        msg = (
+            "`Expr.tail` is deprecated and will be removed in a future version.\n\n"
+            "Hint: instead of `df.select(nw.col('a').tail())`, use `df.select(nw.col('a')).tail()`.\n\n"
+            "Note: this will remain available in `narwhals.stable.v1`.\n"
+            "See [stable api](../backcompat.md/) for more information.\n"
+        )
+        issue_deprecation_warning(msg, _version="1.22.0")
         return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n))
 
     def round(self, decimals: int = 0) -> Self:
@@ -3141,57 +3047,27 @@ def len(self) -> Self:
     def gather_every(self: Self, n: int, offset: int = 0) -> Self:
         r"""Take every nth value in the Series and return as new Series.
 
+        !!! warning
+            `Expr.gather_every` is deprecated and will be removed in a future version.
+            Hint: instead of `df.select(nw.col('a').gather_every())`, use
+            `df.select(nw.col('a')).gather_every()` instead.
+            Note: this will remain available in `narwhals.stable.v1`.
+            See [stable api](../backcompat.md/) for more information.
+
         Arguments:
             n: Gather every *n*-th row.
             offset: Starting index.
 
         Returns:
             A new expression.
-
-        Examples:
-            >>> import pandas as pd
-            >>> import polars as pl
-            >>> import pyarrow as pa
-            >>> import narwhals as nw
-            >>> from narwhals.typing import IntoFrameT
-            >>>
-            >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
-            >>> df_pd = pd.DataFrame(data)
-            >>> df_pl = pl.DataFrame(data)
-            >>> df_pa = pa.table(data)
-
-            Let's define a dataframe-agnostic function in which gather every 2 rows,
-            starting from a offset of 1:
-
-            >>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT:
-            ...     df = nw.from_native(df_native)
-            ...     return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native()
-
-            We can then pass any supported library such as pandas, Polars, or
-            PyArrow to `agnostic_gather_every`:
-
-            >>> agnostic_gather_every(df_pd)
-               a
-            1  2
-            3  4
-
-            >>> agnostic_gather_every(df_pl)
-            shape: (2, 1)
-            ┌─────┐
-            │ a   │
-            │ --- │
-            │ i64 │
-            ╞═════╡
-            │ 2   │
-            │ 4   │
-            └─────┘
-
-            >>> agnostic_gather_every(df_pa)
-            pyarrow.Table
-            a: int64
-            ----
-            a: [[2,4]]
         """
+        msg = (
+            "`Expr.gather_every` is deprecated and will be removed in a future version.\n\n"
+            "Hint: instead of `df.select(nw.col('a').gather_every())`, use `df.select(nw.col('a')).gather_every()`.\n\n"
+            "Note: this will remain available in `narwhals.stable.v1`.\n"
+            "See [stable api](../backcompat.md/) for more information.\n"
+        )
+        issue_deprecation_warning(msg, _version="1.22.0")
         return self.__class__(
             lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset)
         )

diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py
@@ -836,6 +836,42 @@ def rolling_std(
             ddof=ddof,
         )
 
+    def head(self, n: int = 10) -> Self:
+        r"""Get the first `n` rows.
+
+        Arguments:
+            n: Number of rows to return.
+
+        Returns:
+            A new expression.
+        """
+        return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n))
+
+    def tail(self, n: int = 10) -> Self:
+        r"""Get the last `n` rows.
+
+        Arguments:
+            n: Number of rows to return.
+
+        Returns:
+            A new expression.
+        """
+        return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n))
+
+    def gather_every(self: Self, n: int, offset: int = 0) -> Self:
+        r"""Take every nth value in the Series and return as new Series.
+
+        Arguments:
+            n: Gather every *n*-th row.
+            offset: Starting index.
+
+        Returns:
+            A new expression.
+        """
+        return self.__class__(
+            lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset)
+        )
+
 
 class Schema(NwSchema):
     """Ordered mapping of column names to their data type.

diff --git a/tests/expr_and_series/gather_every_test.py b/tests/expr_and_series/gather_every_test.py
@@ -2,6 +2,7 @@
 
 import pytest
 
+import narwhals as nw_main
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
@@ -21,6 +22,9 @@ def test_gather_every_expr(
 
     assert_equal_data(result, expected)
 
+    with pytest.deprecated_call():
+        df.select(nw_main.col("a").gather_every(n=n, offset=offset))
+
 
 @pytest.mark.parametrize("n", [1, 2, 3])
 @pytest.mark.parametrize("offset", [1, 2, 3])

diff --git a/tests/expr_and_series/head_test.py b/tests/expr_and_series/head_test.py
@@ -2,7 +2,8 @@
 
 import pytest
 
-import narwhals as nw
+import narwhals as nw_main
+import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
 
@@ -18,6 +19,9 @@ def test_head(
     expected = {"a": [1, 2]}
     assert_equal_data(result, expected)
 
+    with pytest.deprecated_call():
+        df.select(nw_main.col("a").head(5))
+
 
 @pytest.mark.parametrize("n", [2, -1])
 def test_head_series(constructor_eager: ConstructorEager, n: int) -> None:

diff --git a/tests/expr_and_series/sample_test.py b/tests/expr_and_series/sample_test.py
@@ -2,6 +2,7 @@
 
 import pytest
 
+import narwhals as nw_main
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
@@ -18,6 +19,9 @@ def test_expr_sample(constructor_eager: ConstructorEager) -> None:
     expected_series = (2,)
     assert result_series == expected_series
 
+    with pytest.deprecated_call():
+        df.select(nw_main.col("a").sample(n=2))
+
 
 def test_expr_sample_fraction(
     constructor_eager: ConstructorEager, request: pytest.FixtureRequest