From 554517cd8b2b4c092ed025e8b194075f5348bab6 Mon Sep 17 00:00:00 2001 From: Lucas Nelson Date: Thu, 2 Jan 2025 04:07:48 -0600 Subject: [PATCH 1/5] feat: add `sum` and `sum_horizontal` for `SparkLike` (#1693) * feat: include sum method * feat: include sum_horizontal method * docs: update examples with PySpark * docs: remove pyspark references * feat: coalesce nulls to zero for addition * test: copy tests from expr_and_series suite * feat: handle NaN cases * chore: remove notebook * coalesce, sort in output * sumh sort, sum_expr constructor --------- Co-authored-by: FBruzzesi --- docs/installation.md | 4 +-- narwhals/_spark_like/expr.py | 8 +++++ narwhals/_spark_like/namespace.py | 27 +++++++++++++++++ tests/spark_like_test.py | 49 +++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 2 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index 1acc11774..a406b6295 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -91,5 +91,5 @@ Let's learn about what you just did, and what Narwhals can do for you! !!! info - These examples are using pandas, Polars and PyArrow, however Narwhals supports - other dataframe libraries (See [supported libraries](extending.md)). + These examples are using pandas, Polars, and PyArrow, however Narwhals + supports other dataframe libraries (See [supported libraries](extending.md)). diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 3d09a2427..4887e8001 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -233,6 +233,14 @@ def _min(_input: Column) -> Column: return self._from_call(_min, "min", returns_scalar=True) + def sum(self) -> Self: + def _sum(_input: Column) -> Column: + from pyspark.sql import functions as F # noqa: N812 + + return F.sum(_input) + + return self._from_call(_sum, "sum", returns_scalar=True) + def std(self: Self, ddof: int) -> Self: from functools import partial diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index d150e7541..d34867b00 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -66,3 +66,30 @@ def col(self, *column_names: str) -> SparkLikeExpr: return SparkLikeExpr.from_column_names( *column_names, backend_version=self._backend_version, version=self._version ) + + def sum_horizontal(self, *exprs: IntoSparkLikeExpr) -> SparkLikeExpr: + parsed_exprs = parse_into_exprs(*exprs, namespace=self) + + def func(df: SparkLikeLazyFrame) -> list[Column]: + import pyspark.sql.functions as F # noqa: N812 + + cols = [c for _expr in parsed_exprs for c in _expr(df)] + col_name = get_column_name(df, cols[0]) + return [ + reduce( + operator.add, + (F.coalesce(col, F.lit(0)) for col in cols), + ).alias(col_name) + ] + + return SparkLikeExpr( # type: ignore[abstract] + call=func, + depth=max(x._depth for x in parsed_exprs) + 1, + function_name="sum_horizontal", + root_names=combine_root_names(parsed_exprs), + output_names=reduce_output_names(parsed_exprs), + returns_scalar=False, + backend_version=self._backend_version, + version=self._version, + kwargs={"exprs": exprs}, + ) diff --git a/tests/spark_like_test.py b/tests/spark_like_test.py index 99682b8f7..44335c6d4 100644 --- a/tests/spark_like_test.py +++ b/tests/spark_like_test.py @@ -297,6 +297,45 @@ def test_allh_all(pyspark_constructor: Constructor) -> None: assert_equal_data(result, expected) +# copied from tests/expr_and_series/sum_horizontal_test.py +@pytest.mark.parametrize("col_expr", [nw.col("a"), "a"]) +def test_sumh(pyspark_constructor: Constructor, col_expr: Any) -> None: + data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + df = nw.from_native(pyspark_constructor(data)) + result = df.with_columns(horizontal_sum=nw.sum_horizontal(col_expr, nw.col("b"))) + expected = { + "a": [1, 3, 2], + "b": [4, 4, 6], + "z": [7.0, 8.0, 9.0], + "horizontal_sum": [5, 7, 8], + } + assert_equal_data(result, expected) + + +def test_sumh_nullable(pyspark_constructor: Constructor) -> None: + data = {"a": [1, 8, 3], "b": [4, 5, None], "idx": [0, 1, 2]} + expected = {"hsum": [5, 13, 3]} + + df = nw.from_native(pyspark_constructor(data)) + result = df.select("idx", hsum=nw.sum_horizontal("a", "b")).sort("idx").drop("idx") + assert_equal_data(result, expected) + + +def test_sumh_all(pyspark_constructor: Constructor) -> None: + data = {"a": [1, 2, 3], "b": [10, 20, 30]} + df = nw.from_native(pyspark_constructor(data)) + result = df.select(nw.sum_horizontal(nw.all())) + expected = { + "a": [11, 22, 33], + } + assert_equal_data(result, expected) + result = df.select(c=nw.sum_horizontal(nw.all())) + expected = { + "c": [11, 22, 33], + } + assert_equal_data(result, expected) + + # copied from tests/expr_and_series/count_test.py def test_count(pyspark_constructor: Constructor) -> None: data = {"a": [1, 3, 2], "b": [4, None, 6], "z": [7.0, None, None]} @@ -347,6 +386,16 @@ def test_expr_min_expr(pyspark_constructor: Constructor) -> None: assert_equal_data(result, expected) +# copied from tests/expr_and_series/min_test.py +@pytest.mark.parametrize("expr", [nw.col("a", "b", "z").sum(), nw.sum("a", "b", "z")]) +def test_expr_sum_expr(pyspark_constructor: Constructor, expr: nw.Expr) -> None: + data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + df = nw.from_native(pyspark_constructor(data)) + result = df.select(expr) + expected = {"a": [6], "b": [14], "z": [24.0]} + assert_equal_data(result, expected) + + # copied from tests/expr_and_series/std_test.py def test_std(pyspark_constructor: Constructor) -> None: data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} From f58fc4377f1ba91987d1847d9d302c0581fd980f Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 2 Jan 2025 10:25:04 +0000 Subject: [PATCH 2/5] feat: show native object (if possible) in repr (#1702) --- narwhals/dataframe.py | 38 +++++------------- narwhals/series.py | 14 +------ narwhals/stable/v1/__init__.py | 11 ++--- narwhals/utils.py | 33 +++++++++++++++ tests/repr_test.py | 73 ++++++++++++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 46 deletions(-) create mode 100644 tests/repr_test.py diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index b508ded18..33aa35a22 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -19,6 +19,7 @@ from narwhals.translate import to_native from narwhals.utils import find_stacklevel from narwhals.utils import flatten +from narwhals.utils import generate_repr from narwhals.utils import is_sequence_but_not_str from narwhals.utils import parse_version @@ -414,18 +415,7 @@ def __array__(self, dtype: Any = None, copy: bool | None = None) -> np.ndarray: return self._compliant_frame.__array__(dtype, copy=copy) def __repr__(self) -> str: # pragma: no cover - header = " Narwhals DataFrame " - length = len(header) - return ( - "┌" - + "─" * length - + "┐\n" - + f"|{header}|\n" - + "| Use `.to_native` to see native output |\n" - + "└" - + "─" * length - + "┘" - ) + return generate_repr("Narwhals DataFrame", self.to_native().__repr__()) def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: """Export a DataFrame via the Arrow PyCapsule Interface. @@ -3581,18 +3571,7 @@ def __init__( raise AssertionError(msg) def __repr__(self) -> str: # pragma: no cover - header = " Narwhals LazyFrame " - length = len(header) - return ( - "┌" - + "─" * length - + "┐\n" - + f"|{header}|\n" - + "| Use `.to_native` to see native output |\n" - + "└" - + "─" * length - + "┘" - ) + return generate_repr("Narwhals LazyFrame", self.to_native().__repr__()) @property def implementation(self) -> Implementation: @@ -3640,11 +3619,12 @@ def collect(self) -> DataFrame[Any]: ... } ... ) >>> lf = nw.from_native(lf_pl) - >>> lf - ┌───────────────────────────────────────┐ - | Narwhals LazyFrame | - | Use `.to_native` to see native output | - └───────────────────────────────────────┘ + >>> lf # doctest:+ELLIPSIS + ┌─────────────────────────────┐ + | Narwhals LazyFrame | + |-----------------------------| + |>> df = lf.group_by("a").agg(nw.all().sum()).collect() >>> df.to_native().sort("a") shape: (3, 3) diff --git a/narwhals/series.py b/narwhals/series.py index 8f15ff0ce..de0e64396 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -15,6 +15,7 @@ from narwhals.dtypes import _validate_dtype from narwhals.typing import IntoSeriesT from narwhals.utils import _validate_rolling_arguments +from narwhals.utils import generate_repr from narwhals.utils import parse_version if TYPE_CHECKING: @@ -404,18 +405,7 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se return function(self, *args, **kwargs) def __repr__(self) -> str: # pragma: no cover - header = " Narwhals Series " - length = len(header) - return ( - "┌" - + "─" * length - + "┐\n" - + f"|{header}|\n" - + "| Use `.to_native()` to see native output |\n" - + "└" - + "─" * length - + "┘" - ) + return generate_repr("Narwhals Series", self.to_native().__repr__()) def __len__(self) -> int: return len(self._compliant_series) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index f7705713f..22afc687d 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -430,11 +430,12 @@ def collect(self) -> DataFrame[Any]: ... } ... ) >>> lf = nw.from_native(lf_pl) - >>> lf - ┌───────────────────────────────────────┐ - | Narwhals LazyFrame | - | Use `.to_native` to see native output | - └───────────────────────────────────────┘ + >>> lf # doctest:+ELLIPSIS + ┌─────────────────────────────┐ + | Narwhals LazyFrame | + |-----------------------------| + |>> df = lf.group_by("a").agg(nw.all().sum()).collect() >>> df.to_native().sort("a") shape: (3, 3) diff --git a/narwhals/utils.py b/narwhals/utils.py index b6337cb8e..2125d46c4 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import re from enum import Enum from enum import auto @@ -960,3 +961,35 @@ def _validate_rolling_arguments( min_periods = window_size return window_size, min_periods + + +def generate_repr(header: str, native_repr: str) -> str: + try: + terminal_width = os.get_terminal_size().columns + except OSError: + terminal_width = 80 + native_lines = native_repr.splitlines() + max_native_width = max(len(line) for line in native_lines) + + if max_native_width + 2 < terminal_width: + length = max(max_native_width, len(header)) + output = f"┌{'─'*length}┐\n" + header_extra = length - len(header) + output += ( + f"|{' '*(header_extra//2)}{header}{' '*(header_extra//2 + header_extra%2)}|\n" + ) + output += f"|{'-'*(length)}|\n" + start_extra = (length - max_native_width) // 2 + end_extra = (length - max_native_width) // 2 + (length - max_native_width) % 2 + for line in native_lines: + output += f"|{' '*(start_extra)}{line}{' '*(end_extra + max_native_width - len(line))}|\n" + output += f"└{'─' * length}┘" + return output + + diff = 39 - len(header) + return ( + f"┌{'─' * (39)}┐\n" + f"|{' '*(diff//2)}{header}{' '*(diff//2+diff%2)}|\n" + "| Use `.to_native` to see native output |\n└" + f"{'─' * 39}┘" + ) diff --git a/tests/repr_test.py b/tests/repr_test.py new file mode 100644 index 000000000..40cd51dca --- /dev/null +++ b/tests/repr_test.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import pandas as pd +import pytest + +import narwhals.stable.v1 as nw + + +def test_repr() -> None: + duckdb = pytest.importorskip("duckdb") + df = pd.DataFrame({"a": [1, 2, 3], "b": ["fdaf", "fda", "cf"]}) + result = nw.from_native(df).__repr__() + expected = ( + "┌──────────────────┐\n" + "|Narwhals DataFrame|\n" + "|------------------|\n" + "| a b |\n" + "| 0 1 fdaf |\n" + "| 1 2 fda |\n" + "| 2 3 cf |\n" + "└──────────────────┘" + ) + assert result == expected + result = nw.from_native(df).lazy().__repr__() + expected = ( + "┌──────────────────┐\n" + "|Narwhals LazyFrame|\n" + "|------------------|\n" + "| a b |\n" + "| 0 1 fdaf |\n" + "| 1 2 fda |\n" + "| 2 3 cf |\n" + "└──────────────────┘" + ) + assert result == expected + result = nw.from_native(df)["a"].__repr__() + expected = ( + "┌─────────────────────┐\n" + "| Narwhals Series |\n" + "|---------------------|\n" + "|0 1 |\n" + "|1 2 |\n" + "|2 3 |\n" + "|Name: a, dtype: int64|\n" + "└─────────────────────┘" + ) + assert result == expected + result = nw.from_native(duckdb.table("df")).__repr__() + expected = ( + "┌───────────────────┐\n" + "|Narwhals DataFrame |\n" + "|-------------------|\n" + "|┌───────┬─────────┐|\n" + "|│ a │ b │|\n" + "|│ int64 │ varchar │|\n" + "|├───────┼─────────┤|\n" + "|│ 1 │ fdaf │|\n" + "|│ 2 │ fda │|\n" + "|│ 3 │ cf │|\n" + "|└───────┴─────────┘|\n" + "└───────────────────┘" + ) + assert result == expected + # Make something wider than the terminal size + df = pd.DataFrame({"a": [1, 2, 3], "b": ["fdaf" * 100, "fda", "cf"]}) + result = nw.from_native(duckdb.table("df")).__repr__() + expected = ( + "┌───────────────────────────────────────┐\n" + "| Narwhals DataFrame |\n" + "| Use `.to_native` to see native output |\n" + "└───────────────────────────────────────┘" + ) + assert result == expected From a03b3ec1ff985f49981a250151133a955b43b5b7 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 2 Jan 2025 10:26:05 +0000 Subject: [PATCH 3/5] test: allow to run tests for Polars[gpu] (#1698) --- CONTRIBUTING.md | 8 +++++--- tests/expr_and_series/sample_test.py | 18 +++++++----------- tests/expr_and_series/str/to_datetime_test.py | 5 +---- tests/group_by_test.py | 7 +------ tests/read_scan_test.py | 8 ++++---- tests/tpch_q1_test.py | 6 ++---- tests/utils.py | 12 ++++++++++-- 7 files changed, 30 insertions(+), 34 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b61ed663b..0f8a6eb0b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -133,9 +133,11 @@ If you add code that should be tested, please add tests. - To run unit tests and doctests at the same time, run `pytest tests narwhals --cov=narwhals --doctest-modules` - To run tests multiprocessed, you may also want to use [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) (optional) - To choose which backends to run tests with you, you can use the `--constructors` flag: - - to only run tests for pandas, Polars, and PyArrow, use `pytest --constructors=pandas,pyarrow,polars` - - to run tests for all CPU constructors, use `pytest --all-cpu-constructors` - - by default, tests run for pandas, pandas (PyArrow dtypes), PyArrow, and Polars. + - To only run tests for pandas, Polars, and PyArrow, use `pytest --constructors=pandas,pyarrow,polars` + - To run tests for all CPU constructors, use `pytest --all-cpu-constructors` + - By default, tests run for pandas, pandas (PyArrow dtypes), PyArrow, and Polars. + - To run tests using `cudf.pandas`, run `NARWHALS_DEFAULT_CONSTRUCTORS=pandas python -m cudf.pandas -m pytest` + - To run tests using `polars[gpu]`, run `NARWHALS_POLARS_GPU=1 pytest --constructors=polars[lazy]` If you want to have less surprises when opening a PR, you can take advantage of [nox](https://nox.thea.codes/en/stable/index.html) to run the entire CI/CD test suite locally in your operating system. diff --git a/tests/expr_and_series/sample_test.py b/tests/expr_and_series/sample_test.py index e8985e561..009acc3c9 100644 --- a/tests/expr_and_series/sample_test.py +++ b/tests/expr_and_series/sample_test.py @@ -46,17 +46,13 @@ def test_sample_with_seed( size, n = 100, 10 df = nw.from_native(constructor({"a": list(range(size))})).lazy() expected = {"res1": [True], "res2": [False]} - result = ( - df.select( - seed1=nw.col("a").sample(n=n, seed=123), - seed2=nw.col("a").sample(n=n, seed=123), - seed3=nw.col("a").sample(n=n, seed=42), - ) - .select( - res1=(nw.col("seed1") == nw.col("seed2")).all(), - res2=(nw.col("seed1") == nw.col("seed3")).all(), - ) - .collect() + result = df.select( + seed1=nw.col("a").sample(n=n, seed=123), + seed2=nw.col("a").sample(n=n, seed=123), + seed3=nw.col("a").sample(n=n, seed=42), + ).select( + res1=(nw.col("seed1") == nw.col("seed2")).all(), + res2=(nw.col("seed1") == nw.col("seed3")).all(), ) assert_equal_data(result, expected) diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index 8bab09559..388ef23db 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -130,10 +130,7 @@ def test_to_datetime_infer_fmt_from_date(constructor: Constructor) -> None: data = {"z": ["2020-01-01", "2020-01-02", None]} expected = [datetime(2020, 1, 1), datetime(2020, 1, 2), None] result = ( - nw.from_native(constructor(data)) - .lazy() - .select(nw.col("z").str.to_datetime()) - .collect() + nw.from_native(constructor(data)).lazy().select(nw.col("z").str.to_datetime()) ) assert_equal_data(result, {"z": expected}) diff --git a/tests/group_by_test.py b/tests/group_by_test.py index 188c17c76..a0a7bee41 100644 --- a/tests/group_by_test.py +++ b/tests/group_by_test.py @@ -31,9 +31,7 @@ def test_group_by_complex() -> None: assert_equal_data(result, expected) lf = nw.from_native(df_lazy).lazy() - result = nw.to_native( - lf.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a") - ) + result = lf.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a") assert_equal_data(result, expected) @@ -220,7 +218,6 @@ def test_group_by_simple_named(constructor: Constructor) -> None: b_min=nw.col("b").min(), b_max=nw.col("b").max(), ) - .collect() .sort("a") ) expected = { @@ -240,7 +237,6 @@ def test_group_by_simple_unnamed(constructor: Constructor) -> None: nw.col("b").min(), nw.col("c").max(), ) - .collect() .sort("a") ) expected = { @@ -260,7 +256,6 @@ def test_group_by_multiple_keys(constructor: Constructor) -> None: c_min=nw.col("c").min(), c_max=nw.col("c").max(), ) - .collect() .sort("a") ) expected = { diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py index 89bbcdce8..dbb2cf624 100644 --- a/tests/read_scan_test.py +++ b/tests/read_scan_test.py @@ -60,7 +60,7 @@ def test_scan_csv( df = nw.from_native(constructor(data)) native_namespace = nw.get_native_namespace(df) result = nw.scan_csv(filepath, native_namespace=native_namespace) - assert_equal_data(result.collect(), data) + assert_equal_data(result, data) assert isinstance(result, nw.LazyFrame) @@ -74,7 +74,7 @@ def test_scan_csv_v1( df = nw_v1.from_native(constructor(data)) native_namespace = nw_v1.get_native_namespace(df) result = nw_v1.scan_csv(filepath, native_namespace=native_namespace) - assert_equal_data(result.collect(), data) + assert_equal_data(result, data) assert isinstance(result, nw_v1.LazyFrame) @@ -136,7 +136,7 @@ def test_scan_parquet( df = nw.from_native(constructor(data)) native_namespace = nw.get_native_namespace(df) result = nw.scan_parquet(filepath, native_namespace=native_namespace) - assert_equal_data(result.collect(), data) + assert_equal_data(result, data) assert isinstance(result, nw.LazyFrame) @@ -151,7 +151,7 @@ def test_scan_parquet_v1( df = nw_v1.from_native(constructor(data)) native_namespace = nw_v1.get_native_namespace(df) result = nw_v1.scan_parquet(filepath, native_namespace=native_namespace) - assert_equal_data(result.collect(), data) + assert_equal_data(result, data) assert isinstance(result, nw_v1.LazyFrame) diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index 3d762cbb9..fd2a7d24c 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -66,7 +66,6 @@ def test_q1(library: str, request: pytest.FixtureRequest) -> None: ) .sort(["l_returnflag", "l_linestatus"]) ) - result = query_result.collect().to_dict(as_series=False) expected = { "l_returnflag": ["A", "N", "N", "R"], "l_linestatus": ["F", "F", "O", "F"], @@ -89,7 +88,7 @@ def test_q1(library: str, request: pytest.FixtureRequest) -> None: "avg_disc": [0.05039473684210526, 0.02, 0.05537414965986395, 0.04507042253521127], "count_order": [76, 1, 147, 71], } - assert_equal_data(result, expected) + assert_equal_data(query_result, expected) @pytest.mark.parametrize( @@ -193,7 +192,6 @@ def test_q1_w_pandas_agg_generic_path() -> None: ) .sort(["l_returnflag", "l_linestatus"]) ) - result = query_result.collect().to_dict(as_series=False) expected = { "l_returnflag": ["A", "N", "N", "R"], "l_linestatus": ["F", "F", "O", "F"], @@ -216,4 +214,4 @@ def test_q1_w_pandas_agg_generic_path() -> None: "avg_disc": [0.05039473684210526, 0.02, 0.05537414965986395, 0.04507042253521127], "count_order": [76, 1, 147, 71], } - assert_equal_data(result, expected) + assert_equal_data(query_result, expected) diff --git a/tests/utils.py b/tests/utils.py index 60933046b..8ad8ee03e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import math +import os import sys import warnings from typing import Any @@ -69,10 +70,17 @@ def _sort_dict_by_key( def assert_equal_data(result: Any, expected: dict[str, Any]) -> None: is_pyspark = ( hasattr(result, "_compliant_frame") - and result._compliant_frame._implementation is Implementation.PYSPARK + and result.implementation is Implementation.PYSPARK ) + if hasattr(result, "collect"): - result = result.collect() + if result.implementation is Implementation.POLARS and os.environ.get( + "NARWHALS_POLARS_GPU", False + ): # pragma: no cover + result = result.to_native().collect(engine="gpu") + else: + result = result.collect() + if hasattr(result, "columns"): for key in result.columns: assert key in expected, (key, expected) From 9c3aa53242a32160021f8b6f4d58ca6413f7ff12 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 2 Jan 2025 10:26:30 +0000 Subject: [PATCH 4/5] chore: use `None` instead of `float('nan')` to check for null values in tests (#1697) --- tests/expr_and_series/fill_null_test.py | 8 ++-- tests/expr_and_series/max_horizontal_test.py | 2 +- tests/expr_and_series/mean_horizontal_test.py | 2 +- tests/expr_and_series/min_horizontal_test.py | 2 +- tests/expr_and_series/rolling_mean_test.py | 13 ++++--- tests/expr_and_series/rolling_std_test.py | 31 +++++++++++---- tests/expr_and_series/rolling_sum_test.py | 12 +++--- tests/expr_and_series/rolling_var_test.py | 8 ++-- tests/expr_and_series/skew_test.py | 4 +- tests/expr_and_series/unary_test.py | 38 ++++++++++--------- tests/expr_and_series/when_test.py | 11 +++--- tests/frame/drop_nulls_test.py | 4 +- tests/frame/join_test.py | 18 ++++----- tests/frame/pivot_test.py | 4 +- tests/frame/sort_test.py | 4 +- tests/group_by_test.py | 2 +- tests/hypothesis/join_test.py | 4 +- tests/spark_like_test.py | 16 ++++---- tests/utils.py | 8 +++- 19 files changed, 107 insertions(+), 84 deletions(-) diff --git a/tests/expr_and_series/fill_null_test.py b/tests/expr_and_series/fill_null_test.py index 32e4b9cdd..57f767d4d 100644 --- a/tests/expr_and_series/fill_null_test.py +++ b/tests/expr_and_series/fill_null_test.py @@ -136,7 +136,7 @@ def test_fill_null_limits(constructor: Constructor) -> None: nw.col("a", "b").fill_null(strategy="forward", limit=2) ) expected_forward = { - "a": [1, 1, 1, float("nan"), 5, 6, 6, 6, float("nan"), 10], + "a": [1, 1, 1, None, 5, 6, 6, 6, None, 10], "b": ["a", "a", "a", None, "b", "c", "c", "c", None, "d"], } assert_equal_data(result_forward, expected_forward) @@ -146,7 +146,7 @@ def test_fill_null_limits(constructor: Constructor) -> None: ) expected_backward = { - "a": [1, float("nan"), 5, 5, 5, 6, float("nan"), 10, 10, 10], + "a": [1, None, 5, 5, 5, 6, None, 10, 10, 10], "b": ["a", None, "b", "b", "b", "c", None, "d", "d", "d"], } assert_equal_data(result_backward, expected_backward) @@ -203,7 +203,7 @@ def test_fill_null_series_limits(constructor_eager: ConstructorEager) -> None: "ignore", message="The 'downcast' keyword in fillna is deprecated" ) expected_forward = { - "a_forward": [0.0, 1, 1, float("nan"), 2, 2, float("nan"), 3], + "a_forward": [0.0, 1, 1, None, 2, 2, None, 3], "b_forward": ["", "a", "a", None, "c", "c", None, "e"], } result_forward = df.select( @@ -214,7 +214,7 @@ def test_fill_null_series_limits(constructor_eager: ConstructorEager) -> None: assert_equal_data(result_forward, expected_forward) expected_backward = { - "a_backward": [0.0, 1, float("nan"), 2, 2, float("nan"), 3, 3], + "a_backward": [0.0, 1, None, 2, 2, None, 3, 3], "b_backward": ["", "a", None, "c", "c", None, "e", "e"], } diff --git a/tests/expr_and_series/max_horizontal_test.py b/tests/expr_and_series/max_horizontal_test.py index 3becb36be..c86e11318 100644 --- a/tests/expr_and_series/max_horizontal_test.py +++ b/tests/expr_and_series/max_horizontal_test.py @@ -9,7 +9,7 @@ from tests.utils import assert_equal_data data = {"a": [1, 3, None, None], "b": [4, None, 6, None], "z": [3, 1, None, None]} -expected_values = [4, 3, 6, float("nan")] +expected_values = [4, 3, 6, None] @pytest.mark.parametrize("col_expr", [nw.col("a"), "a"]) diff --git a/tests/expr_and_series/mean_horizontal_test.py b/tests/expr_and_series/mean_horizontal_test.py index 31b4b2109..485bf1750 100644 --- a/tests/expr_and_series/mean_horizontal_test.py +++ b/tests/expr_and_series/mean_horizontal_test.py @@ -14,7 +14,7 @@ def test_meanh(constructor: Constructor, col_expr: Any) -> None: data = {"a": [1, 3, None, None], "b": [4, None, 6, None]} df = nw.from_native(constructor(data)) result = df.select(horizontal_mean=nw.mean_horizontal(col_expr, nw.col("b"))) - expected = {"horizontal_mean": [2.5, 3.0, 6.0, float("nan")]} + expected = {"horizontal_mean": [2.5, 3.0, 6.0, None]} assert_equal_data(result, expected) diff --git a/tests/expr_and_series/min_horizontal_test.py b/tests/expr_and_series/min_horizontal_test.py index 5fb7fce97..787e3e2a4 100644 --- a/tests/expr_and_series/min_horizontal_test.py +++ b/tests/expr_and_series/min_horizontal_test.py @@ -9,7 +9,7 @@ from tests.utils import assert_equal_data data = {"a": [1, 3, None, None], "b": [4, None, 6, None], "z": [3, 1, None, None]} -expected_values = [1, 1, 6, float("nan")] +expected_values = [1, 1, 6, None] @pytest.mark.parametrize("col_expr", [nw.col("a"), "a"]) diff --git a/tests/expr_and_series/rolling_mean_test.py b/tests/expr_and_series/rolling_mean_test.py index 33c817bf3..a6dd41935 100644 --- a/tests/expr_and_series/rolling_mean_test.py +++ b/tests/expr_and_series/rolling_mean_test.py @@ -1,6 +1,7 @@ from __future__ import annotations import random +from typing import Any import hypothesis.strategies as st import pandas as pd @@ -16,15 +17,15 @@ data = {"a": [None, 1, 2, None, 4, 6, 11]} -kwargs_and_expected = { - "x1": {"kwargs": {"window_size": 3}, "expected": [float("nan")] * 6 + [7.0]}, +kwargs_and_expected: dict[str, dict[str, Any]] = { + "x1": {"kwargs": {"window_size": 3}, "expected": [None] * 6 + [7.0]}, "x2": { "kwargs": {"window_size": 3, "min_periods": 1}, - "expected": [float("nan"), 1.0, 1.5, 1.5, 3.0, 5.0, 7.0], + "expected": [None, 1.0, 1.5, 1.5, 3.0, 5.0, 7.0], }, "x3": { "kwargs": {"window_size": 2, "min_periods": 1}, - "expected": [float("nan"), 1.0, 1.5, 2.0, 4.0, 5.0, 8.5], + "expected": [None, 1.0, 1.5, 2.0, 4.0, 5.0, 8.5], }, "x4": { "kwargs": {"window_size": 5, "min_periods": 1, "center": True}, @@ -52,7 +53,7 @@ def test_rolling_mean_expr( df = nw.from_native(constructor(data)) result = df.select( **{ - name: nw.col("a").rolling_mean(**values["kwargs"]) # type: ignore[arg-type] + name: nw.col("a").rolling_mean(**values["kwargs"]) for name, values in kwargs_and_expected.items() } ) @@ -69,7 +70,7 @@ def test_rolling_mean_series(constructor_eager: ConstructorEager) -> None: result = df.select( **{ - name: df["a"].rolling_mean(**values["kwargs"]) # type: ignore[arg-type] + name: df["a"].rolling_mean(**values["kwargs"]) for name, values in kwargs_and_expected.items() } ) diff --git a/tests/expr_and_series/rolling_std_test.py b/tests/expr_and_series/rolling_std_test.py index 3fdba9493..b937f8430 100644 --- a/tests/expr_and_series/rolling_std_test.py +++ b/tests/expr_and_series/rolling_std_test.py @@ -1,8 +1,8 @@ from __future__ import annotations +from math import sqrt from typing import Any -import numpy as np import pytest import narwhals.stable.v1 as nw @@ -17,32 +17,49 @@ { "name": "x1", "kwargs": {"window_size": 3}, - "expected": np.sqrt([float("nan"), float("nan"), 1 / 3, 1, 4 / 3, 7 / 3, 3]), + "expected": [ + sqrt(x) if x is not None else x + for x in [None, None, 1 / 3, 1, 4 / 3, 7 / 3, 3] + ], }, { "name": "x2", "kwargs": {"window_size": 3, "min_periods": 1}, - "expected": np.sqrt([float("nan"), 0.5, 1 / 3, 1.0, 4 / 3, 7 / 3, 3]), + "expected": [ + sqrt(x) if x is not None else x + for x in [None, 0.5, 1 / 3, 1.0, 4 / 3, 7 / 3, 3] + ], }, { "name": "x3", "kwargs": {"window_size": 2, "min_periods": 1}, - "expected": np.sqrt([float("nan"), 0.5, 0.5, 2.0, 2.0, 4.5, 4.5]), + "expected": [ + sqrt(x) if x is not None else x for x in [None, 0.5, 0.5, 2.0, 2.0, 4.5, 4.5] + ], }, { "name": "x4", "kwargs": {"window_size": 5, "min_periods": 1, "center": True}, - "expected": np.sqrt([1 / 3, 11 / 12, 4 / 5, 17 / 10, 2.0, 2.25, 3]), + "expected": [ + sqrt(x) if x is not None else x + for x in [1 / 3, 11 / 12, 4 / 5, 17 / 10, 2.0, 2.25, 3] + ], }, { "name": "x5", "kwargs": {"window_size": 4, "min_periods": 1, "center": True}, - "expected": np.sqrt([0.5, 1 / 3, 11 / 12, 11 / 12, 2.25, 2.25, 3]), + "expected": [ + sqrt(x) if x is not None else x + for x in [0.5, 1 / 3, 11 / 12, 11 / 12, 2.25, 2.25, 3] + ], }, { "name": "x6", "kwargs": {"window_size": 3, "ddof": 2}, - "expected": np.sqrt([float("nan"), float("nan"), 2 / 3, 2.0, 8 / 3, 14 / 3, 6.0]), + "expected": [ + sqrt(x) if x is not None else x + for x in [None, None, 2 / 3, 2.0, 8 / 3, 14 / 3, 6.0] + ], }, ) diff --git a/tests/expr_and_series/rolling_sum_test.py b/tests/expr_and_series/rolling_sum_test.py index fae22552b..8c4537e49 100644 --- a/tests/expr_and_series/rolling_sum_test.py +++ b/tests/expr_and_series/rolling_sum_test.py @@ -18,15 +18,15 @@ data = {"a": [None, 1, 2, None, 4, 6, 11]} -kwargs_and_expected = { - "x1": {"kwargs": {"window_size": 3}, "expected": [float("nan")] * 6 + [21]}, +kwargs_and_expected: dict[str, dict[str, Any]] = { + "x1": {"kwargs": {"window_size": 3}, "expected": [None] * 6 + [21]}, "x2": { "kwargs": {"window_size": 3, "min_periods": 1}, - "expected": [float("nan"), 1.0, 3.0, 3.0, 6.0, 10.0, 21.0], + "expected": [None, 1.0, 3.0, 3.0, 6.0, 10.0, 21.0], }, "x3": { "kwargs": {"window_size": 2, "min_periods": 1}, - "expected": [float("nan"), 1.0, 3.0, 2.0, 4.0, 10.0, 17.0], + "expected": [None, 1.0, 3.0, 2.0, 4.0, 10.0, 17.0], }, "x4": { "kwargs": {"window_size": 5, "min_periods": 1, "center": True}, @@ -54,7 +54,7 @@ def test_rolling_sum_expr( df = nw.from_native(constructor(data)) result = df.select( **{ - name: nw.col("a").rolling_sum(**values["kwargs"]) # type: ignore[arg-type] + name: nw.col("a").rolling_sum(**values["kwargs"]) for name, values in kwargs_and_expected.items() } ) @@ -71,7 +71,7 @@ def test_rolling_sum_series(constructor_eager: ConstructorEager) -> None: result = df.select( **{ - name: df["a"].rolling_sum(**values["kwargs"]) # type: ignore[arg-type] + name: df["a"].rolling_sum(**values["kwargs"]) for name, values in kwargs_and_expected.items() } ) diff --git a/tests/expr_and_series/rolling_var_test.py b/tests/expr_and_series/rolling_var_test.py index 32767c990..37475e76a 100644 --- a/tests/expr_and_series/rolling_var_test.py +++ b/tests/expr_and_series/rolling_var_test.py @@ -23,17 +23,17 @@ { "name": "x1", "kwargs": {"window_size": 3}, - "expected": [float("nan"), float("nan"), 1 / 3, 1, 4 / 3, 7 / 3, 3], + "expected": [None, None, 1 / 3, 1, 4 / 3, 7 / 3, 3], }, { "name": "x2", "kwargs": {"window_size": 3, "min_periods": 1}, - "expected": [float("nan"), 0.5, 1 / 3, 1.0, 4 / 3, 7 / 3, 3], + "expected": [None, 0.5, 1 / 3, 1.0, 4 / 3, 7 / 3, 3], }, { "name": "x3", "kwargs": {"window_size": 2, "min_periods": 1}, - "expected": [float("nan"), 0.5, 0.5, 2.0, 2.0, 4.5, 4.5], + "expected": [None, 0.5, 0.5, 2.0, 2.0, 4.5, 4.5], }, { "name": "x4", @@ -48,7 +48,7 @@ { "name": "x6", "kwargs": {"window_size": 3, "ddof": 2}, - "expected": [float("nan"), float("nan"), 2 / 3, 2.0, 8 / 3, 14 / 3, 6.0], + "expected": [None, None, 2 / 3, 2.0, 8 / 3, 14 / 3, 6.0], }, ) diff --git a/tests/expr_and_series/skew_test.py b/tests/expr_and_series/skew_test.py index b2029d08e..849496807 100644 --- a/tests/expr_and_series/skew_test.py +++ b/tests/expr_and_series/skew_test.py @@ -13,9 +13,9 @@ ("data", "expected"), [ ([], None), - ([1], float("nan")), + ([1], None), ([1, 2], 0.0), - ([0.0, 0.0, 0.0], float("nan")), + ([0.0, 0.0, 0.0], None), ([1, 2, 3, 2, 1], 0.343622), ], ) diff --git a/tests/expr_and_series/unary_test.py b/tests/expr_and_series/unary_test.py index 3a580b726..f2f9c33ff 100644 --- a/tests/expr_and_series/unary_test.py +++ b/tests/expr_and_series/unary_test.py @@ -93,7 +93,7 @@ def test_unary_two_elements(constructor: Constructor) -> None: "b_nunique": [2], "b_skew": [0.0], "c_nunique": [2], - "c_skew": [float("nan")], + "c_skew": [None], } assert_equal_data(result, expected) @@ -115,21 +115,23 @@ def test_unary_two_elements_series(constructor_eager: ConstructorEager) -> None: "b_nunique": [2], "b_skew": [0.0], "c_nunique": [2], - "c_skew": [float("nan")], + "c_skew": [None], } assert_equal_data(result, expected) def test_unary_one_element(constructor: Constructor) -> None: - data = {"a": [1], "b": [2], "c": [float("nan")]} + data = {"a": [1], "b": [2], "c": [None]} # Dask runs into a divide by zero RuntimeWarning for 1 element skew. context = ( pytest.warns(RuntimeWarning, match="invalid value encountered in scalar divide") if "dask" in str(constructor) else does_not_raise() ) - with context: - result = nw.from_native(constructor(data)).select( + result = ( + nw.from_native(constructor(data)) + .with_columns(nw.col("c").cast(nw.Float64)) + .select( a_nunique=nw.col("a").n_unique(), a_skew=nw.col("a").skew(), b_nunique=nw.col("b").n_unique(), @@ -137,19 +139,21 @@ def test_unary_one_element(constructor: Constructor) -> None: c_nunique=nw.col("c").n_unique(), c_skew=nw.col("c").skew(), ) - expected = { - "a_nunique": [1], - "a_skew": [float("nan")], - "b_nunique": [1], - "b_skew": [float("nan")], - "c_nunique": [1], - "c_skew": [float("nan")], - } + ) + expected = { + "a_nunique": [1], + "a_skew": [None], + "b_nunique": [1], + "b_skew": [None], + "c_nunique": [1], + "c_skew": [None], + } + with context: assert_equal_data(result, expected) def test_unary_one_element_series(constructor_eager: ConstructorEager) -> None: - data = {"a": [1], "b": [2], "c": [float("nan")]} + data = {"a": [1], "b": [2], "c": [None]} df = nw.from_native(constructor_eager(data)) result = { "a_nunique": [df["a"].n_unique()], @@ -161,10 +165,10 @@ def test_unary_one_element_series(constructor_eager: ConstructorEager) -> None: } expected = { "a_nunique": [1], - "a_skew": [float("nan")], + "a_skew": [None], "b_nunique": [1], - "b_skew": [float("nan")], + "b_skew": [None], "c_nunique": [1], - "c_skew": [float("nan")], + "c_skew": [None], } assert_equal_data(result, expected) diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py index 3cef177fa..8648ae4fb 100644 --- a/tests/expr_and_series/when_test.py +++ b/tests/expr_and_series/when_test.py @@ -21,7 +21,7 @@ def test_when(constructor: Constructor) -> None: df = nw.from_native(constructor(data)) result = df.select(nw.when(nw.col("a") == 1).then(value=3).alias("a_when")) expected = { - "a_when": [3, np.nan, np.nan], + "a_when": [3, None, None], } assert_equal_data(result, expected) @@ -41,7 +41,7 @@ def test_multiple_conditions(constructor: Constructor) -> None: nw.when(nw.col("a") < 3, nw.col("c") < 5.0).then(3).alias("a_when") ) expected = { - "a_when": [3, np.nan, np.nan], + "a_when": [3, None, None], } assert_equal_data(result, expected) @@ -65,7 +65,7 @@ def test_value_numpy_array( nw.when(nw.col("a") == 1).then(np.asanyarray([3, 4, 5])).alias("a_when") ) expected = { - "a_when": [3, np.nan, np.nan], + "a_when": [3, None, None], } assert_equal_data(result, expected) @@ -77,7 +77,7 @@ def test_value_series(constructor_eager: ConstructorEager) -> None: assert isinstance(s, nw.Series) result = df.select(nw.when(nw.col("a") == 1).then(s).alias("a_when")) expected = { - "a_when": [3, np.nan, np.nan], + "a_when": [3, None, None], } assert_equal_data(result, expected) @@ -86,7 +86,7 @@ def test_value_expression(constructor: Constructor) -> None: df = nw.from_native(constructor(data)) result = df.select(nw.when(nw.col("a") == 1).then(nw.col("a") + 9).alias("a_when")) expected = { - "a_when": [10, np.nan, np.nan], + "a_when": [10, None, None], } assert_equal_data(result, expected) @@ -98,7 +98,6 @@ def test_otherwise_numpy_array( request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) - import numpy as np result = df.select( nw.when(nw.col("a") == 1).then(-1).otherwise(np.array([0, 9, 10])).alias("a_when") diff --git a/tests/frame/drop_nulls_test.py b/tests/frame/drop_nulls_test.py index 680cbd4c4..bb55439eb 100644 --- a/tests/frame/drop_nulls_test.py +++ b/tests/frame/drop_nulls_test.py @@ -24,8 +24,8 @@ def test_drop_nulls(constructor: Constructor) -> None: @pytest.mark.parametrize( ("subset", "expected"), [ - ("a", {"a": [1, 2.0, 4.0], "b": [float("nan"), 3.0, 5.0]}), - (["a"], {"a": [1, 2.0, 4.0], "b": [float("nan"), 3.0, 5.0]}), + ("a", {"a": [1, 2.0, 4.0], "b": [None, 3.0, 5.0]}), + (["a"], {"a": [1, 2.0, 4.0], "b": [None, 3.0, 5.0]}), (["a", "b"], {"a": [2.0, 4.0], "b": [3.0, 5.0]}), ], ) diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index c743893d0..1abe2b90f 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -235,22 +235,20 @@ def test_left_join(constructor: Constructor) -> None: } df_left = nw.from_native(constructor(data_left)) df_right = nw.from_native(constructor(data_right)) - result = df_left.join(df_right, left_on="bob", right_on="co", how="left").select( # type: ignore[arg-type] - nw.all().fill_null(float("nan")) - ) + result = df_left.join(df_right, left_on="bob", right_on="co", how="left") # type: ignore[arg-type] result = result.sort("index") result = result.drop("index_right") expected = { "antananarivo": [1, 2, 3], "bob": [4, 5, 6], - "antananarivo_right": [1, 2, float("nan")], + "antananarivo_right": [1, 2, None], "index": [0, 1, 2], } result_on_list = df_left.join( df_right, # type: ignore[arg-type] on=["antananarivo", "index"], how="left", - ).select(nw.all().fill_null(float("nan"))) + ) result_on_list = result_on_list.sort("index") expected_on_list = { "antananarivo": [1, 2, 3], @@ -312,15 +310,15 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None: left_on="antananarivo", right_on="d", how="left", - ).select(nw.all().fill_null(float("nan"))) + ) result = result.sort("index") result = result.drop("index_right") expected = { "antananarivo": [1, 2, 3], "bob": [4, 5, 6], "d": [1, 4, 2], - "antananarivo_right": [1.0, 3.0, float("nan")], - "c": [4.0, 6.0, float("nan")], + "antananarivo_right": [1.0, 3.0, None], + "c": [4.0, 6.0, None], "index": [0, 1, 2], } assert_equal_data(result, expected) @@ -397,7 +395,7 @@ def test_joinasof_numeric( expected_forward = { "antananarivo": [1, 5, 10], "val": ["a", "b", "c"], - "val_right": [1, 6, float("nan")], + "val_right": [1, 6, None], } expected_nearest = { "antananarivo": [1, 5, 10], @@ -523,7 +521,7 @@ def test_joinasof_by( "antananarivo": [1, 5, 7, 10], "bob": ["D", "D", "C", "A"], "c": [9, 2, 1, 1], - "d": [1, 3, float("nan"), 4], + "d": [1, 3, None, 4], } assert_equal_data(result, expected) assert_equal_data(result_by, expected) diff --git a/tests/frame/pivot_test.py b/tests/frame/pivot_test.py index 98ef7466f..0e3860292 100644 --- a/tests/frame/pivot_test.py +++ b/tests/frame/pivot_test.py @@ -271,7 +271,7 @@ def test_pivot_no_index( expected = { "ix": [1, 1, 2, 2], "bar": ["x", "y", "w", "z"], - "a": [1.0, float("nan"), float("nan"), 3.0], - "b": [float("nan"), 2.0, 4.0, float("nan")], + "a": [1.0, None, None, 3.0], + "b": [None, 2.0, 4.0, None], } assert_equal_data(result, expected) diff --git a/tests/frame/sort_test.py b/tests/frame/sort_test.py index 4e12cc95a..5147c6f56 100644 --- a/tests/frame/sort_test.py +++ b/tests/frame/sort_test.py @@ -29,8 +29,8 @@ def test_sort(constructor: Constructor) -> None: @pytest.mark.parametrize( ("nulls_last", "expected"), [ - (True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, float("nan")]}), - (False, {"a": [-1, 0, 2, 0], "b": [float("nan"), 3, 2, 1]}), + (True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, None]}), + (False, {"a": [-1, 0, 2, 0], "b": [None, 3, 2, 1]}), ], ) def test_sort_nulls( diff --git a/tests/group_by_test.py b/tests/group_by_test.py index a0a7bee41..3c57ce027 100644 --- a/tests/group_by_test.py +++ b/tests/group_by_test.py @@ -288,7 +288,7 @@ def test_key_with_nulls( .sort("a") .with_columns(nw.col("b").cast(nw.Float64)) ) - expected = {"b": [4.0, 5, float("nan")], "len": [1, 1, 1], "a": [1, 2, 3]} + expected = {"b": [4.0, 5, None], "len": [1, 1, 1], "a": [1, 2, 3]} assert_equal_data(result, expected) diff --git a/tests/hypothesis/join_test.py b/tests/hypothesis/join_test.py index 5b498db65..7f1cd8103 100644 --- a/tests/hypothesis/join_test.py +++ b/tests/hypothesis/join_test.py @@ -161,7 +161,7 @@ def test_left_join( # pragma: no cover left_on=left_key, right_on=right_key, ) - ).select(pl.all().fill_null(float("nan"))) + ) assert_equal_data( result_pd.to_dict(as_series=False), result_pl.to_dict(as_series=False) ) @@ -174,7 +174,7 @@ def test_left_join( # pragma: no cover left_on=left_key, right_on=right_key, ) - .select(nw.all().cast(nw.Float64).fill_null(float("nan"))) + .select(nw.all().cast(nw.Float64)) .pipe(lambda df: df.sort(df.columns)) ) assert_equal_data( diff --git a/tests/spark_like_test.py b/tests/spark_like_test.py index 44335c6d4..27777139c 100644 --- a/tests/spark_like_test.py +++ b/tests/spark_like_test.py @@ -235,8 +235,8 @@ def test_sort(pyspark_constructor: Constructor) -> None: @pytest.mark.parametrize( ("nulls_last", "expected"), [ - (True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, float("nan")]}), - (False, {"a": [-1, 0, 2, 0], "b": [float("nan"), 3, 2, 1]}), + (True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, None]}), + (False, {"a": [-1, 0, 2, 0], "b": [None, 3, 2, 1]}), ], ) def test_sort_nulls( @@ -338,7 +338,7 @@ def test_sumh_all(pyspark_constructor: Constructor) -> None: # copied from tests/expr_and_series/count_test.py def test_count(pyspark_constructor: Constructor) -> None: - data = {"a": [1, 3, 2], "b": [4, None, 6], "z": [7.0, None, None]} + data = {"a": [1, 2, 3], "b": [4, None, 6], "z": [7.0, None, None]} df = nw.from_native(pyspark_constructor(data)) result = df.select(nw.col("a", "b", "z").count()) expected = {"a": [3], "b": [2], "z": [1]} @@ -560,8 +560,8 @@ def test_drop_nulls(pyspark_constructor: Constructor) -> None: @pytest.mark.parametrize( ("subset", "expected"), [ - ("a", {"a": [1, 2.0, 4.0], "b": [float("nan"), 3.0, 5.0]}), - (["a"], {"a": [1, 2.0, 4.0], "b": [float("nan"), 3.0, 5.0]}), + ("a", {"a": [1, 2.0, 4.0], "b": [None, 3.0, 5.0]}), + (["a"], {"a": [1, 2.0, 4.0], "b": [None, 3.0, 5.0]}), (["a", "b"], {"a": [2.0, 4.0], "b": [3.0, 5.0]}), ], ) @@ -831,7 +831,7 @@ def test_left_join(pyspark_constructor: Constructor) -> None: expected = { "antananarivo": [1, 2, 3], "bob": [4, 5, 6], - "antananarivo_right": [1, 2, float("nan")], + "antananarivo_right": [1, 2, None], "idx": [0, 1, 2], } result_on_list = df_left.join( @@ -912,8 +912,8 @@ def test_left_join_overlapping_column(pyspark_constructor: Constructor) -> None: "antananarivo": [1, 2, 3], "bob": [4, 5, 6], "d": [1, 4, 2], - "antananarivo_right": [1.0, 3.0, float("nan")], - "c": [4.0, 6.0, float("nan")], + "antananarivo_right": [1.0, 3.0, None], + "c": [4.0, 6.0, None], "idx": [0, 1, 2], } assert_equal_data(result, expected) diff --git a/tests/utils.py b/tests/utils.py index 8ad8ee03e..e7c9c7d89 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -98,8 +98,12 @@ def assert_equal_data(result: Any, expected: dict[str, Any]) -> None: for i, (lhs, rhs) in enumerate(zip_strict(result_value, expected_value)): if isinstance(lhs, float) and not math.isnan(lhs): are_equivalent_values = math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6) - elif isinstance(lhs, float) and math.isnan(lhs) and rhs is not None: - are_equivalent_values = math.isnan(rhs) # pragma: no cover + elif isinstance(lhs, float) and math.isnan(lhs): + are_equivalent_values = rhs is None or math.isnan(rhs) + elif isinstance(rhs, float) and math.isnan(rhs): + are_equivalent_values = lhs is None or math.isnan(lhs) + elif lhs is None: + are_equivalent_values = rhs is None elif pd.isna(lhs): are_equivalent_values = pd.isna(rhs) else: From 44d449df787dd90b75a65b959d4de9497996500b Mon Sep 17 00:00:00 2001 From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> Date: Thu, 2 Jan 2025 11:30:53 +0100 Subject: [PATCH 5/5] docs: `Series` method' docstrings (#1699) --- narwhals/dataframe.py | 5 +- narwhals/series.py | 2682 +++++++++++++++++++++++--------- narwhals/stable/v1/__init__.py | 104 +- 3 files changed, 1995 insertions(+), 796 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 33aa35a22..0e401d464 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -3577,9 +3577,8 @@ def __repr__(self) -> str: # pragma: no cover def implementation(self) -> Implementation: """Return implementation of native frame. - This can be useful when you need to some special-casing for - some libraries for features outside of Narwhals' scope - for - example, when dealing with pandas' Period Dtype. + This can be useful when you need to use special-casing for features outside of + Narwhals' scope - for example, when dealing with pandas' Period Dtype. Returns: Implementation. diff --git a/narwhals/series.py b/narwhals/series.py index de0e64396..4203cdb74 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -79,9 +79,8 @@ def __init__( def implementation(self) -> Implementation: """Return implementation of native Series. - This can be useful when you need to some special-casing for - some libraries for features outside of Narwhals' scope - for - example, when dealing with pandas' Period Dtype. + This can be useful when you need to use special-casing for features outside of + Narwhals' scope - for example, when dealing with pandas' Period Dtype. Returns: Implementation. @@ -89,14 +88,19 @@ def implementation(self) -> Implementation: Examples: >>> import narwhals as nw >>> import pandas as pd + >>> s_native = pd.Series([1, 2, 3]) >>> s = nw.from_native(s_native, series_only=True) + >>> s.implementation + >>> s.implementation.is_pandas() True + >>> s.implementation.is_pandas_like() True + >>> s.implementation.is_polars() False """ @@ -125,16 +129,17 @@ def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self: A single element if `idx` is an integer, else a subset of the Series. Examples: + >>> from typing import Any >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> from typing import Any - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) - >>> s_pa = pa.chunked_array([s]) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: @@ -142,12 +147,15 @@ def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self: ... s = nw.from_native(s_native, series_only=True) ... return s[0] - We can then pass either pandas, Polars, or any supported library: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_get_first_item`: >>> agnostic_get_first_item(s_pd) np.int64(1) + >>> agnostic_get_first_item(s_pl) 1 + >>> agnostic_get_first_item(s_pa) 1 @@ -161,6 +169,7 @@ def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self: 0 1 1 2 dtype: int64 + >>> agnostic_slice(s_pl) # doctest:+NORMALIZE_WHITESPACE shape: (2,) Series: '' [i64] @@ -168,7 +177,8 @@ def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self: 1 2 ] - >>> agnostic_slice(s_pa) + + >>> agnostic_slice(s_pa) # doctest:+ELLIPSIS [ [ @@ -220,26 +230,31 @@ def to_native(self) -> IntoSeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_to_native(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_native`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_native(s_pd) 0 1 1 2 2 3 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_to_native(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -247,6 +262,16 @@ def to_native(self) -> IntoSeriesT: 2 3 ] + + >>> agnostic_to_native(s_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2, + 3 + ] + ] """ return self._compliant_series._native_series # type: ignore[no-any-return] @@ -284,26 +309,31 @@ def scatter(self, indices: int | Sequence[int], values: Any) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT + >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} >>> df_pd = pd.DataFrame(data) >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) We define a library agnostic function: - >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT: + >>> def agnostic_scatter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return df.with_columns(df["a"].scatter([0, 1], [999, 888])).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_scatter`: - >>> my_library_agnostic_function(df_pd) + >>> agnostic_scatter(df_pd) a b 0 999 4 1 888 5 2 3 6 - >>> my_library_agnostic_function(df_pl) + + >>> agnostic_scatter(df_pl) shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -314,6 +344,14 @@ def scatter(self, indices: int | Sequence[int], values: Any) -> Self: │ 888 ┆ 5 │ │ 3 ┆ 6 │ └─────┴─────┘ + + >>> agnostic_scatter(df_pa) + pyarrow.Table + a: int64 + b: int64 + ---- + a: [[999,888,3]] + b: [[4,5,6]] """ return self._from_compliant_series( self._compliant_series.scatter(indices, self._extract_native(values)) @@ -329,23 +367,31 @@ def shape(self) -> tuple[int]: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> tuple[int]: + >>> def agnostic_shape(s_native: IntoSeries) -> tuple[int]: ... s = nw.from_native(s_native, series_only=True) ... return s.shape - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_shape`: + + >>> agnostic_shape(s_pd) + (3,) - >>> my_library_agnostic_function(s_pd) + >>> agnostic_shape(s_pl) (3,) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_shape(s_pa) (3,) """ return self._compliant_series.shape # type: ignore[no-any-return] @@ -372,35 +418,48 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se Examples: >>> import polars as pl >>> import pandas as pd + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s_pd = pd.Series([1, 2, 3, 4]) - >>> s_pl = pl.Series([1, 2, 3, 4]) - Lets define a function to pipe into - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + + Let's define a function to pipe into: + + >>> def agnostic_pipe(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.pipe(lambda x: x + 2).to_native() - Now apply it to the series + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_pipe`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_pipe(s_pd) 0 3 1 4 2 5 - 3 6 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) + + >>> agnostic_pipe(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) Series: '' [i64] [ 3 4 5 - 6 ] - + >>> agnostic_pipe(s_pa) # doctest: +ELLIPSIS + + [ + [ + 3, + 4, + 5 + ] + ] """ return function(self, *args, **kwargs) @@ -419,25 +478,33 @@ def len(self) -> int: The number of elements in the Series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + >>> data = [1, 2, None] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function that computes the len of the series: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> int: + >>> def agnostic_len(s_native: IntoSeries) -> int: ... s = nw.from_native(s_native, series_only=True) ... return s.len() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_len`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_len(s_pd) 3 - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_len(s_pl) + 3 + + >>> agnostic_len(s_pa) 3 """ return len(self._compliant_series) @@ -452,23 +519,31 @@ def dtype(self: Self) -> DType: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> nw.dtypes.DType: + >>> def agnostic_dtype(s_native: IntoSeriesT) -> nw.dtypes.DType: ... s = nw.from_native(s_native, series_only=True) ... return s.dtype - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_dtype`: + + >>> agnostic_dtype(s_pd) + Int64 - >>> my_library_agnostic_function(s_pd) + >>> agnostic_dtype(s_pl) Int64 - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_dtype(s_pa) Int64 """ return self._compliant_series.dtype # type: ignore[no-any-return] @@ -483,23 +558,27 @@ def name(self) -> str: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="foo") - >>> s_pl = pl.Series("foo", s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="foo") + >>> s_pl = pl.Series("foo", data) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> str: + >>> def agnostic_name(s_native: IntoSeries) -> str: ... s = nw.from_native(s_native, series_only=True) ... return s.name - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas or Polars + to `agnostic_name`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_name(s_pd) 'foo' - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_name(s_pl) 'foo' """ return self._compliant_series.name # type: ignore[no-any-return] @@ -561,25 +640,27 @@ def ewm_mean( >>> import polars as pl >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [1, 2, 3] >>> s_pd = pd.Series(name="a", data=data) >>> s_pl = pl.Series(name="a", values=data) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_ewm_mean(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.ewm_mean(com=1, ignore_nulls=False).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas or Polars + to `agnostic_ewm_mean`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_ewm_mean(s_pd) 0 1.000000 1 1.666667 2 2.428571 Name: a, dtype: float64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_ewm_mean(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: 'a' [f64] [ @@ -612,26 +693,31 @@ def cast(self: Self, dtype: DType | type[DType]) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [True, False, True] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [True, False, True] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_cast(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.cast(nw.Int64).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cast`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_cast(s_pd) 0 1 1 0 2 1 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_cast(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -639,6 +725,16 @@ def cast(self: Self, dtype: DType | type[DType]) -> Self: 0 1 ] + + >>> agnostic_cast(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 0, + 1 + ] + ] """ _validate_dtype(dtype) return self._from_compliant_series(self._compliant_series.cast(dtype)) @@ -652,27 +748,32 @@ def to_frame(self) -> DataFrame[Any]: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries, IntoDataFrame - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="a") - >>> s_pl = pl.Series("a", s) + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2] + >>> s_pd = pd.Series(data, name="a") + >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> IntoDataFrame: + >>> def agnostic_to_frame(s_native: IntoSeries) -> IntoDataFrame: ... s = nw.from_native(s_native, series_only=True) ... return s.to_frame().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_frame`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_frame(s_pd) a 0 1 1 2 - 2 3 - >>> my_library_agnostic_function(s_pl) - shape: (3, 1) + + >>> agnostic_to_frame(s_pl) + shape: (2, 1) ┌─────┐ │ a │ │ --- │ @@ -680,8 +781,13 @@ def to_frame(self) -> DataFrame[Any]: ╞═════╡ │ 1 │ │ 2 │ - │ 3 │ └─────┘ + + >>> agnostic_to_frame(s_pa) + pyarrow.Table + : int64 + ---- + : [[1,2]] """ return self._dataframe( self._compliant_series.to_frame(), @@ -703,23 +809,31 @@ def to_list(self) -> list[Any]: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="a") - >>> s_pl = pl.Series("a", s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_to_list(s_native: IntoSeries): ... s = nw.from_native(s_native, series_only=True) ... return s.to_list() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_list`: + + >>> agnostic_to_list(s_pd) + [1, 2, 3] - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_list(s_pl) [1, 2, 3] - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_to_list(s_pa) [1, 2, 3] """ return self._compliant_series.to_list() # type: ignore[no-any-return] @@ -733,23 +847,31 @@ def mean(self) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_mean(s_native: IntoSeries) -> float: ... s = nw.from_native(s_native, series_only=True) ... return s.mean() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_mean`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_mean(s_pd) np.float64(2.0) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_mean(s_pl) + 2.0 + + >>> agnostic_mean(s_pa) 2.0 """ return self._compliant_series.mean() @@ -769,24 +891,28 @@ def median(self) -> Any: >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [5, 3, 8] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) - >>> s_pa = pa.chunked_array([s]) + + >>> data = [5, 3, 8] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_median(s_native: IntoSeries) -> float: ... s = nw.from_native(s_native, series_only=True) ... return s.median() - We can then pass any supported library such as pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_median`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_median(s_pd) np.float64(5.0) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_median(s_pl) 5.0 - >>> my_library_agnostic_function(s_pa) + + >>> agnostic_median(s_pa) 5.0 """ return self._compliant_series.median() @@ -802,22 +928,29 @@ def skew(self: Self) -> Any: >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw - >>> s = [1, 1, 2, 10, 100] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) - >>> s_pa = pa.array(s) + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 1, 2, 10, 100] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> @nw.narwhalify - ... def func(s): + >>> def agnostic_skew(s_native: IntoSeries) -> float: + ... s = nw.from_native(s_native, series_only=True) ... return s.skew() - We can pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_skew`: - >>> func(s_pd) + >>> agnostic_skew(s_pd) np.float64(1.4724267269058975) - >>> func(s_pl) + + >>> agnostic_skew(s_pl) + 1.4724267269058975 + + >>> agnostic_skew(s_pa) 1.4724267269058975 Notes: @@ -835,25 +968,32 @@ def count(self) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_count(s_native: IntoSeries) -> int: ... s = nw.from_native(s_native, series_only=True) ... return s.count() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_count`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_count(s_pd) np.int64(3) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_count(s_pl) 3 + >>> agnostic_count(s_pa) + 3 """ return self._compliant_series.count() @@ -869,23 +1009,31 @@ def any(self) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [False, True, False] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [False, True, False] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_any(s_native: IntoSeries) -> bool: ... s = nw.from_native(s_native, series_only=True) ... return s.any() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_any`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_any(s_pd) np.True_ - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_any(s_pl) + True + + >>> agnostic_any(s_pa) True """ return self._compliant_series.any() @@ -899,25 +1047,32 @@ def all(self) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [True, False, True] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [False, True, False] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_all(s_native: IntoSeries) -> bool: ... s = nw.from_native(s_native, series_only=True) ... return s.all() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_all`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_all(s_pd) np.False_ - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_all(s_pl) False + >>> agnostic_all(s_pa) + False """ return self._compliant_series.all() @@ -930,23 +1085,31 @@ def min(self) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_min(s_native: IntoSeries): ... s = nw.from_native(s_native, series_only=True) ... return s.min() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_min`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_min(s_pd) np.int64(1) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_min(s_pl) + 1 + + >>> agnostic_min(s_pa) 1 """ return self._compliant_series.min() @@ -960,23 +1123,31 @@ def max(self) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_max(s_native: IntoSeries): ... s = nw.from_native(s_native, series_only=True) ... return s.max() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_max`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_max(s_pd) np.int64(3) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_max(s_pl) + 3 + + >>> agnostic_max(s_pa) 3 """ return self._compliant_series.max() @@ -990,10 +1161,11 @@ def arg_min(self) -> int: >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) - >>> s_pa = pa.chunked_array([s]) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: @@ -1001,13 +1173,15 @@ def arg_min(self) -> int: ... s = nw.from_native(s_native, series_only=True) ... return s.arg_min() - We can then pass either any supported library such as pandas, Polars, - or PyArrow: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_arg_min`: >>> agnostic_arg_min(s_pd) np.int64(0) + >>> agnostic_arg_min(s_pl) 0 + >>> agnostic_arg_min(s_pa) 0 """ @@ -1022,10 +1196,11 @@ def arg_max(self) -> int: >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) - >>> s_pa = pa.chunked_array([s]) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: @@ -1033,13 +1208,15 @@ def arg_max(self) -> int: ... s = nw.from_native(s_native, series_only=True) ... return s.arg_max() - We can then pass either any supported library such as pandas, Polars, - or PyArrow: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_arg_max`: >>> agnostic_arg_max(s_pd) np.int64(2) + >>> agnostic_arg_max(s_pl) 2 + >>> agnostic_arg_max(s_pa) 2 """ @@ -1054,23 +1231,31 @@ def sum(self) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_sum(s_native: IntoSeries): ... s = nw.from_native(s_native, series_only=True) ... return s.sum() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sum`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_sum(s_pd) np.int64(6) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_sum(s_pl) + 6 + + >>> agnostic_sum(s_pa) 6 """ return self._compliant_series.sum() @@ -1088,23 +1273,31 @@ def std(self, *, ddof: int = 1) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_std(s_native: IntoSeries) -> float: ... s = nw.from_native(s_native, series_only=True) ... return s.std() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_std`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_std(s_pd) np.float64(1.0) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_std(s_pl) + 1.0 + + >>> agnostic_std(s_pa) 1.0 """ return self._compliant_series.std(ddof=ddof) @@ -1119,24 +1312,32 @@ def var(self, *, ddof: int = 1) -> Any: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def agnostic_var(s_native: IntoSeries): + >>> def agnostic_var(s_native: IntoSeries) -> float: ... s = nw.from_native(s_native, series_only=True) ... return s.var() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_var`: >>> agnostic_var(s_pd) np.float64(1.0) + >>> agnostic_var(s_pl) 1.0 + + >>> agnostic_var(s_pa) + 1.0 """ return self._compliant_series.var(ddof=ddof) @@ -1155,27 +1356,31 @@ def clip( Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def clip_lower(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_clip_lower(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.clip(2).to_native() - We can then pass either pandas or Polars to `clip_lower`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_clip_lower`: - >>> clip_lower(s_pd) + >>> agnostic_clip_lower(s_pd) 0 2 1 2 2 3 dtype: int64 - >>> clip_lower(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_clip_lower(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -1184,20 +1389,32 @@ def clip( 3 ] + >>> agnostic_clip_lower(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 2, + 3 + ] + ] + We define another library agnostic function: - >>> def clip_upper(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_clip_upper(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.clip(upper_bound=2).to_native() - We can then pass either pandas or Polars to `clip_upper`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_clip_upper`: - >>> clip_upper(s_pd) + >>> agnostic_clip_upper(s_pd) 0 1 1 2 2 2 dtype: int64 - >>> clip_upper(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_clip_upper(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -1206,21 +1423,33 @@ def clip( 2 ] + >>> agnostic_clip_upper(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 2, + 2 + ] + ] + We can have both at the same time - >>> s = [-1, 1, -3, 3, -5, 5] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + >>> data = [-1, 1, -3, 3, -5, 5] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_clip(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.clip(-1, 3).to_native() - We can pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_clip`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_clip(s_pd) 0 -1 1 1 2 -1 @@ -1228,7 +1457,8 @@ def clip( 4 -1 5 3 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_clip(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (6,) Series: '' [i64] [ @@ -1239,6 +1469,19 @@ def clip( -1 3 ] + + >>> agnostic_clip_upper(s_pa) # doctest: +ELLIPSIS + + [ + [ + -1, + 1, + -3, + 2, + -5, + 2 + ] + ] """ return self._from_compliant_series( self._compliant_series.clip(lower_bound=lower_bound, upper_bound=upper_bound) @@ -1256,25 +1499,31 @@ def is_in(self, other: Any) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s_pd = pd.Series([1, 2, 3]) - >>> s_pl = pl.Series([1, 2, 3]) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_is_in(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.is_in([3, 2, 8]).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_in`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_is_in(s_pd) 0 False 1 True 2 True dtype: bool - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_is_in(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [bool] [ @@ -1282,6 +1531,16 @@ def is_in(self, other: Any) -> Self: true true ] + + >>> agnostic_is_in(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + true + ] + ] """ return self._from_compliant_series( self._compliant_series.is_in(self._extract_native(other)) @@ -1296,31 +1555,45 @@ def arg_true(self) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [1, None, None, 2] - >>> s_pd = pd.Series(data, name="a") - >>> s_pl = pl.Series("a", data) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_arg_true(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.is_null().arg_true().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_arg_true`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_arg_true(s_pd) 1 1 2 2 - Name: a, dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + dtype: int64 + + >>> agnostic_arg_true(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) - Series: 'a' [u32] + Series: '' [u32] [ 1 2 ] + + >>> agnostic_arg_true(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 2 + ] + ] """ return self._from_compliant_series(self._compliant_series.arg_true()) @@ -1341,9 +1614,11 @@ def drop_nulls(self) -> Self: >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s_pd = pd.Series([2, 4, None, 3, 5]) - >>> s_pl = pl.Series([2, 4, None, 3, 5]) - >>> s_pa = pa.chunked_array([[2, 4, None, 3, 5]]) + + >>> data = [2, 4, None, 3, 5] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: @@ -1351,7 +1626,8 @@ def drop_nulls(self) -> Self: ... s = nw.from_native(s_native, series_only=True) ... return s.drop_nulls().to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_drop_nulls`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_drop_nulls`: >>> agnostic_drop_nulls(s_pd) 0 2.0 @@ -1359,6 +1635,7 @@ def drop_nulls(self) -> Self: 3 3.0 4 5.0 dtype: float64 + >>> agnostic_drop_nulls(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [i64] @@ -1368,6 +1645,7 @@ def drop_nulls(self) -> Self: 3 5 ] + >>> agnostic_drop_nulls(s_pa) # doctest: +ELLIPSIS [ @@ -1390,26 +1668,31 @@ def abs(self) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [2, -4, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [2, -4, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.abs().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_abs`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_abs(s_pd) 0 2 1 4 2 3 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_abs(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -1417,6 +1700,16 @@ def abs(self) -> Self: 4 3 ] + + >>> agnostic_abs(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 4, + 3 + ] + ] """ return self._from_compliant_series(self._compliant_series.abs()) @@ -1432,26 +1725,31 @@ def cum_sum(self: Self, *, reverse: bool = False) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [2, 4, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [2, 4, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_cum_sum(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.cum_sum().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_sum`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_cum_sum(s_pd) 0 2 1 6 2 9 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_cum_sum(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -1459,6 +1757,16 @@ def cum_sum(self: Self, *, reverse: bool = False) -> Self: 6 9 ] + + >>> agnostic_cum_sum(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 6, + 9 + ] + ] """ return self._from_compliant_series( self._compliant_series.cum_sum(reverse=reverse) @@ -1478,26 +1786,31 @@ def unique(self, *, maintain_order: bool = False) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [2, 4, 4, 6] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [2, 4, 4, 6] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_unique(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.unique(maintain_order=True).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_unique`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_unique(s_pd) 0 2 1 4 2 6 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_unique(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -1505,6 +1818,16 @@ def unique(self, *, maintain_order: bool = False) -> Self: 4 6 ] + + >>> agnostic_unique(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 4, + 6 + ] + ] """ return self._from_compliant_series( self._compliant_series.unique(maintain_order=maintain_order) @@ -1528,26 +1851,31 @@ def diff(self) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [2, 4, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [2, 4, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_diff(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.diff().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_diff`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_diff(s_pd) 0 NaN 1 2.0 2 -1.0 dtype: float64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_diff(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -1555,6 +1883,16 @@ def diff(self) -> Self: 2 -1 ] + + >>> agnostic_diff(s_pa) # doctest: +ELLIPSIS + + [ + [ + null, + 2, + -1 + ] + ] """ return self._from_compliant_series(self._compliant_series.diff()) @@ -1580,26 +1918,31 @@ def shift(self, n: int) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [2, 4, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [2, 4, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_shift(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.shift(1).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_shift`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_shift(s_pd) 0 NaN 1 2.0 2 4.0 dtype: float64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_shift(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -1607,6 +1950,16 @@ def shift(self, n: int) -> Self: 2 4 ] + + >>> agnostic_shift(s_pa) # doctest: +ELLIPSIS + + [ + [ + null, + 2, + 4 + ] + ] """ return self._from_compliant_series(self._compliant_series.shift(n)) @@ -1636,29 +1989,34 @@ def sample( The results are not consistent across libraries. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT - >>> s_pd = pd.Series([1, 2, 3, 4]) - >>> s_pl = pl.Series([1, 2, 3, 4]) + >>> data = [1, 2, 3, 4] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_sample(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.sample(fraction=1.0, with_replacement=True).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sample`: - >>> my_library_agnostic_function(s_pd) # doctest: +SKIP + >>> agnostic_sample(s_pd) # doctest: +SKIP a 2 3 1 2 3 4 3 4 - >>> my_library_agnostic_function(s_pl) # doctest: +SKIP + + >>> agnostic_sample(s_pl) # doctest: +SKIP shape: (4,) Series: '' [i64] [ @@ -1667,6 +2025,17 @@ def sample( 3 4 ] + + >>> agnostic_sample(s_pa) # doctest: +SKIP + + [ + [ + 1, + 4, + 3, + 4 + ] + ] """ return self._from_compliant_series( self._compliant_series.sample( @@ -1708,25 +2077,28 @@ def alias(self, name: str) -> Self: >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="foo") - >>> s_pl = pl.Series("foo", s) - >>> s_pa = pa.chunked_array([s]) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="foo") + >>> s_pl = pl.Series("foo", data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_alias(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.alias("bar").to_native() - We can then pass any supported library such as pandas, Polars, or PyArrow: + We can then pass any supported library such as pandas or Polars, or + PyArrow to `agnostic_alias`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_alias(s_pd) 0 1 1 2 2 3 Name: bar, dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_alias(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: 'bar' [i64] [ @@ -1734,7 +2106,8 @@ def alias(self, name: str) -> Self: 2 3 ] - >>> my_library_agnostic_function(s_pa) # doctest: +ELLIPSIS + + >>> agnostic_alias(s_pa) # doctest: +ELLIPSIS [ [ @@ -1782,25 +2155,28 @@ def rename(self, name: str) -> Self: >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="foo") - >>> s_pl = pl.Series("foo", s) - >>> s_pa = pa.chunked_array([s]) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="foo") + >>> s_pl = pl.Series("foo", data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_rename(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.rename("bar").to_native() - We can then pass any supported library such as pandas, Polars, or PyArrow: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rename`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_rename(s_pd) 0 1 1 2 2 3 Name: bar, dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_rename(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: 'bar' [i64] [ @@ -1808,7 +2184,8 @@ def rename(self, name: str) -> Self: 2 3 ] - >>> my_library_agnostic_function(s_pa) # doctest: +ELLIPSIS + + >>> agnostic_rename(s_pa) # doctest: +ELLIPSIS [ [ @@ -1844,32 +2221,36 @@ def replace_strict( A new Series with values replaced according to the mapping. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa - >>> df_pd = pd.DataFrame({"a": [3, 0, 1, 2]}) - >>> df_pl = pl.DataFrame({"a": [3, 0, 1, 2]}) - >>> df_pa = pa.table({"a": [3, 0, 1, 2]}) + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = {"a": [3, 0, 1, 2]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> df_pa = pa.table(data) Let's define dataframe-agnostic functions: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_replace_strict(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.replace_strict( ... [0, 1, 2, 3], ["zero", "one", "two", "three"], return_dtype=nw.String ... ).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace_strict`: - >>> my_library_agnostic_function(df_pd["a"]) + >>> agnostic_replace_strict(df_pd["a"]) 0 three 1 zero 2 one 3 two Name: a, dtype: object - >>> my_library_agnostic_function(df_pl["a"]) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_replace_strict(df_pl["a"]) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: 'a' [str] [ @@ -1878,7 +2259,8 @@ def replace_strict( "one" "two" ] - >>> my_library_agnostic_function(df_pa["a"]) + + >>> agnostic_replace_strict(df_pa["a"]) [ [ @@ -1914,11 +2296,14 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [5, None, 1, 2] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [5, None, 1, 2] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define library agnostic functions: @@ -1930,7 +2315,8 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: ... s = nw.from_native(s_native, series_only=True) ... return s.sort(descending=True).to_native() - We can then pass either pandas or Polars to `agnostic_sort`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_sort` and `agnostic_sort_descending`: >>> agnostic_sort(s_pd) 1 NaN @@ -1938,6 +2324,7 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: 3 2.0 0 5.0 dtype: float64 + >>> agnostic_sort(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [i64] @@ -1947,12 +2334,25 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: 2 5 ] + + >>> agnostic_sort(s_pa) # doctest: +ELLIPSIS + + [ + [ + null, + 1, + 2, + 5 + ] + ] + >>> agnostic_sort_descending(s_pd) 1 NaN 0 5.0 3 2.0 2 1.0 dtype: float64 + >>> agnostic_sort_descending(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [i64] @@ -1962,6 +2362,17 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: 2 1 ] + + >>> agnostic_sort_descending(s_pa) # doctest: +ELLIPSIS + + [ + [ + null, + 5, + 2, + 1 + ] + ] """ return self._from_compliant_series( self._compliant_series.sort(descending=descending, nulls_last=nulls_last) @@ -1981,13 +2392,14 @@ def is_null(self) -> Self: Examples: >>> import pandas as pd >>> import polars as pl - >>> import narwhals as nw >>> import pyarrow as pa + >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [1, 2, None] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) - >>> s_pa = pa.chunked_array([s]) + + >>> data = [1, 2, None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: @@ -1995,13 +2407,15 @@ def is_null(self) -> Self: ... s = nw.from_native(s_native, series_only=True) ... return s.is_null().to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_is_null`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_null`: >>> agnostic_is_null(s_pd) 0 False 1 False 2 True dtype: bool + >>> agnostic_is_null(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [bool] @@ -2010,6 +2424,7 @@ def is_null(self) -> Self: false true ] + >>> agnostic_is_null(s_pa) # doctest:+ELLIPSIS [ @@ -2049,10 +2464,11 @@ def fill_null( >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [1, 2, None] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) - >>> s_pa = pa.chunked_array([s]) + + >>> data = [1, 2, None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: @@ -2060,13 +2476,15 @@ def fill_null( ... s = nw.from_native(s_native, series_only=True) ... return s.fill_null(5).to_native() - We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_fill_null`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_fill_null`: >>> agnostic_fill_null(s_pd) 0 1.0 1 2.0 2 5.0 dtype: float64 + >>> agnostic_fill_null(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] @@ -2075,6 +2493,7 @@ def fill_null( 2 5 ] + >>> agnostic_fill_null(s_pa) # doctest:+ELLIPSIS [ @@ -2096,6 +2515,7 @@ def fill_null( 1 2.0 2 2.0 dtype: float64 + >>> agnostic_fill_null_with_strategy(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] @@ -2104,6 +2524,7 @@ def fill_null( 2 2 ] + >>> agnostic_fill_null_with_strategy(s_pa) # doctest:+ELLIPSIS [ @@ -2147,27 +2568,33 @@ def is_between( Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s_pd = pd.Series([1, 2, 3, 4, 5]) - >>> s_pl = pl.Series([1, 2, 3, 4, 5]) + + >>> data = [1, 2, 3, 4, 5] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_is_between(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.is_between(2, 4, "right").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_between`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_is_between(s_pd) 0 False 1 False 2 True 3 True 4 False dtype: bool - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_is_between(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (5,) Series: '' [bool] [ @@ -2177,6 +2604,18 @@ def is_between( true false ] + + >>> agnostic_is_between(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + false, + true, + true, + false + ] + ] """ return self._from_compliant_series( self._compliant_series.is_between( @@ -2195,23 +2634,31 @@ def n_unique(self) -> int: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 2, 3] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [1, 2, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_n_unique(s_native: IntoSeries) -> int: ... s = nw.from_native(s_native, series_only=True) ... return s.n_unique() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_n_unique`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_n_unique(s_pd) 3 - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_n_unique(s_pl) + 3 + + >>> agnostic_n_unique(s_pa) 3 """ return self._compliant_series.n_unique() # type: ignore[no-any-return] @@ -2223,26 +2670,34 @@ def to_numpy(self) -> np.ndarray: NumPy ndarray representation of the Series. Examples: + >>> import numpy as np >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> import numpy as np >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="a") - >>> s_pl = pl.Series("a", s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="a") + >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> np.ndarray: + >>> def agnostic_to_numpy(s_native: IntoSeries) -> np.ndarray: ... s = nw.from_native(s_native, series_only=True) ... return s.to_numpy() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_numpy`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_numpy(s_pd) array([1, 2, 3]...) - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_to_numpy(s_pl) + array([1, 2, 3]...) + + >>> agnostic_to_numpy(s_pa) array([1, 2, 3]...) """ return self._compliant_series.to_numpy() @@ -2256,30 +2711,41 @@ def to_pandas(self) -> pd.Series: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeries - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="a") - >>> s_pl = pl.Series("a", s) + + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data, name="a") + >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> pd.Series: + >>> def agnostic_to_pandas(s_native: IntoSeries) -> pd.Series: ... s = nw.from_native(s_native, series_only=True) ... return s.to_pandas() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_pandas`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_pandas(s_pd) 0 1 1 2 2 3 Name: a, dtype: int64 - >>> my_library_agnostic_function(s_pl) + + >>> agnostic_to_pandas(s_pl) 0 1 1 2 2 3 Name: a, dtype: int64 + + >>> agnostic_to_pandas(s_pa) + 0 1 + 1 2 + 2 3 + Name: , dtype: int64 """ return self._compliant_series.to_pandas() @@ -2416,26 +2882,31 @@ def filter(self, other: Any) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> s = [4, 10, 15, 34, 50] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) + + >>> data = [4, 10, 15, 34, 50] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_filter(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.filter(s > 10).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_filter`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_filter(s_pd) 2 15 3 34 4 50 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_filter(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -2443,6 +2914,16 @@ def filter(self, other: Any) -> Self: 34 50 ] + + >>> agnostic_filter(s_pa) # doctest: +ELLIPSIS + + [ + [ + 15, + 34, + 50 + ] + ] """ return self._from_compliant_series( self._compliant_series.filter(self._extract_native(other)) @@ -2456,28 +2937,34 @@ def is_duplicated(self: Self) -> Self: A new Series with boolean values indicating duplicated rows. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl - >>> s_pd = pd.Series([1, 2, 3, 1]) - >>> s_pl = pl.Series([1, 2, 3, 1]) + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 1] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_is_duplicated(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.is_duplicated().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_duplicated`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_is_duplicated(s_pd) 0 True 1 False 2 False 3 True dtype: bool - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_is_duplicated(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [bool] [ @@ -2486,6 +2973,17 @@ def is_duplicated(self: Self) -> Self: false true ] + + >>> agnostic_is_duplicated(s_pa) # doctest: +ELLIPSIS + + [ + [ + true, + false, + false, + true + ] + ] """ return self._from_compliant_series(self._compliant_series.is_duplicated()) @@ -2496,29 +2994,35 @@ def is_empty(self: Self) -> bool: A boolean indicating if the series is empty. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries Let's define a dataframe-agnostic function that filters rows in which "foo" values are greater than 10, and then checks if the result is empty or not: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_is_empty(s_native: IntoSeries) -> bool: ... s = nw.from_native(s_native, series_only=True) ... return s.filter(s > 10).is_empty() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_empty`: - >>> s_pd = pd.Series([1, 2, 3]) - >>> s_pl = pl.Series([1, 2, 3]) - >>> my_library_agnostic_function(s_pd), my_library_agnostic_function(s_pl) - (True, True) + >>> data = [1, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + >>> agnostic_is_empty(s_pd), agnostic_is_empty(s_pl), agnostic_is_empty(s_pa) + (True, True, True) - >>> s_pd = pd.Series([100, 2, 3]) - >>> s_pl = pl.Series([100, 2, 3]) - >>> my_library_agnostic_function(s_pd), my_library_agnostic_function(s_pl) - (False, False) + >>> data = [100, 2, 3] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) + >>> agnostic_is_empty(s_pd), agnostic_is_empty(s_pl), agnostic_is_empty(s_pa) + (False, False, False) """ return self._compliant_series.is_empty() # type: ignore[no-any-return] @@ -2529,29 +3033,34 @@ def is_unique(self: Self) -> Self: A new Series with boolean values indicating unique rows. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl - >>> s_pd = pd.Series([1, 2, 3, 1]) - >>> s_pl = pl.Series([1, 2, 3, 1]) + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 1] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_is_unique(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.is_unique().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_unique`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_is_unique(s_pd) 0 False 1 True 2 True 3 False dtype: bool - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_is_unique(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [bool] [ @@ -2560,6 +3069,16 @@ def is_unique(self: Self) -> Self: true false ] + >>> agnostic_is_unique(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + true, + false + ] + ] """ return self._from_compliant_series(self._compliant_series.is_unique()) @@ -2575,29 +3094,33 @@ def null_count(self: Self) -> int: The number of null values in the Series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa - >>> s = [1, None, None] - >>> s_pd = pd.Series(s) - >>> s_pl = pl.Series(s) - >>> s_pa = pa.chunked_array([s]) + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + + >>> data = [1, None, None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function that returns the null count of the series: - >>> def agnostic_null_count(s_native: IntoSeries): + >>> def agnostic_null_count(s_native: IntoSeries) -> int: ... s = nw.from_native(s_native, series_only=True) ... return s.null_count() - We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_null_count`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_null_count`: >>> agnostic_null_count(s_pd) np.int64(2) + >>> agnostic_null_count(s_pl) 2 + >>> agnostic_null_count(s_pa) 2 """ @@ -2610,22 +3133,27 @@ def is_first_distinct(self: Self) -> Self: A new Series with boolean values indicating the first occurrence of each distinct value. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl - >>> s_pd = pd.Series([1, 1, 2, 3, 2]) - >>> s_pl = pl.Series([1, 1, 2, 3, 2]) + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 1, 2, 3, 2] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_is_first_distinct(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.is_first_distinct().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_first_distinct`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_is_first_distinct(s_pd) 0 True 1 False 2 True @@ -2633,7 +3161,7 @@ def is_first_distinct(self: Self) -> Self: 4 False dtype: bool - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_is_first_distinct(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (5,) Series: '' [bool] [ @@ -2643,6 +3171,18 @@ def is_first_distinct(self: Self) -> Self: true false ] + + >>> agnostic_is_first_distinct(s_pa) # doctest: +ELLIPSIS + + [ + [ + true, + false, + true, + true, + false + ] + ] """ return self._from_compliant_series(self._compliant_series.is_first_distinct()) @@ -2653,22 +3193,27 @@ def is_last_distinct(self: Self) -> Self: A new Series with boolean values indicating the last occurrence of each distinct value. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl - >>> s_pd = pd.Series([1, 1, 2, 3, 2]) - >>> s_pl = pl.Series([1, 1, 2, 3, 2]) + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 1, 2, 3, 2] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_is_last_distinct(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.is_last_distinct().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_last_distinct`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_is_last_distinct(s_pd) 0 False 1 True 2 False @@ -2676,7 +3221,7 @@ def is_last_distinct(self: Self) -> Self: 4 True dtype: bool - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_is_last_distinct(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (5,) Series: '' [bool] [ @@ -2686,6 +3231,18 @@ def is_last_distinct(self: Self) -> Self: true true ] + + >>> agnostic_is_last_distinct(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + false, + true, + true + ] + ] """ return self._from_compliant_series(self._compliant_series.is_last_distinct()) @@ -2699,30 +3256,40 @@ def is_sorted(self: Self, *, descending: bool = False) -> bool: A boolean indicating if the Series is sorted. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + >>> unsorted_data = [1, 3, 2] >>> sorted_data = [3, 2, 1] Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function( - ... s_native: IntoSeries, descending: bool = False - ... ): + >>> def agnostic_is_sorted(s_native: IntoSeries, descending: bool = False): ... s = nw.from_native(s_native, series_only=True) ... return s.is_sorted(descending=descending) - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_sorted`: + + >>> agnostic_is_sorted(pd.Series(unsorted_data)) + False + + >>> agnostic_is_sorted(pd.Series(sorted_data), descending=True) + True - >>> my_library_agnostic_function(pl.Series(unsorted_data)) + >>> agnostic_is_sorted(pl.Series(unsorted_data)) False - >>> my_library_agnostic_function(pl.Series(sorted_data), descending=True) + + >>> agnostic_is_sorted(pl.Series(sorted_data), descending=True) True - >>> my_library_agnostic_function(pd.Series(unsorted_data)) + + >>> agnostic_is_sorted(pa.chunked_array([unsorted_data])) False - >>> my_library_agnostic_function(pd.Series(sorted_data), descending=True) + + >>> agnostic_is_sorted(pa.chunked_array([sorted_data]), descending=True) True """ return self._compliant_series.is_sorted(descending=descending) # type: ignore[no-any-return] @@ -2751,28 +3318,34 @@ def value_counts( - Either count or proportion as second column, depending on normalize parameter. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries, IntoDataFrame >>> import pandas as pd >>> import polars as pl - >>> s_pd = pd.Series([1, 1, 2, 3, 2], name="s") - >>> s_pl = pl.Series(values=[1, 1, 2, 3, 2], name="s") + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 1, 2, 3, 2] + >>> s_pd = pd.Series(data, name="s") + >>> s_pl = pl.Series(values=data, name="s") + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> IntoDataFrame: + >>> def agnostic_value_counts(s_native: IntoSeries) -> IntoDataFrame: ... s = nw.from_native(s_native, series_only=True) ... return s.value_counts(sort=True).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_value_counts`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_value_counts(s_pd) s count 0 1 2 1 2 2 2 3 1 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_value_counts(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3, 2) ┌─────┬───────┐ │ s ┆ count │ @@ -2783,6 +3356,14 @@ def value_counts( │ 2 ┆ 2 │ │ 3 ┆ 1 │ └─────┴───────┘ + + >>> agnostic_value_counts(s_pa) + pyarrow.Table + : int64 + count: int64 + ---- + : [[1,2,3]] + count: [[2,2,1]] """ return self._dataframe( self._compliant_series.value_counts( @@ -2809,30 +3390,37 @@ def quantile( The quantile value. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + >>> data = list(range(50)) >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries): + >>> def agnostic_quantile(s_native: IntoSeries) -> list[float]: ... s = nw.from_native(s_native, series_only=True) ... return [ ... s.quantile(quantile=q, interpolation="nearest") ... for q in (0.1, 0.25, 0.5, 0.75, 0.9) ... ] - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_quantile`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_quantile(s_pd) [np.int64(5), np.int64(12), np.int64(24), np.int64(37), np.int64(44)] - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_quantile(s_pl) [5.0, 12.0, 25.0, 37.0, 44.0] + + >>> agnostic_quantile(s_pa) + [5, 12, 24, 37, 44] """ return self._compliant_series.quantile( quantile=quantile, interpolation=interpolation @@ -2852,20 +3440,19 @@ def zip_with(self: Self, mask: Self, other: Self) -> Self: A new Series with values selected from self or other based on the mask. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl - >>> s1_pl = pl.Series([1, 2, 3, 4, 5]) - >>> s2_pl = pl.Series([5, 4, 3, 2, 1]) - >>> mask_pl = pl.Series([True, False, True, False, True]) - >>> s1_pd = pd.Series([1, 2, 3, 4, 5]) - >>> s2_pd = pd.Series([5, 4, 3, 2, 1]) - >>> mask_pd = pd.Series([True, False, True, False, True]) + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + + >>> data = [1, 2, 3, 4, 5] + >>> other = [5, 4, 3, 2, 1] + >>> mask = [True, False, True, False, True] Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function( + >>> def agnostic_zip_with( ... s1_native: IntoSeriesT, mask_native: IntoSeriesT, s2_native: IntoSeriesT ... ) -> IntoSeriesT: ... s1 = nw.from_native(s1_native, series_only=True) @@ -2873,10 +3460,13 @@ def zip_with(self: Self, mask: Self, other: Self) -> Self: ... s2 = nw.from_native(s2_native, series_only=True) ... return s1.zip_with(mask, s2).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_zip_with`: - >>> my_library_agnostic_function( - ... s1_pl, mask_pl, s2_pl + >>> agnostic_zip_with( + ... s1_native=pl.Series(data), + ... mask_native=pl.Series(mask), + ... s2_native=pl.Series(other), ... ) # doctest: +NORMALIZE_WHITESPACE shape: (5,) Series: '' [i64] @@ -2887,13 +3477,34 @@ def zip_with(self: Self, mask: Self, other: Self) -> Self: 2 5 ] - >>> my_library_agnostic_function(s1_pd, mask_pd, s2_pd) + + >>> agnostic_zip_with( + ... s1_native=pd.Series(data), + ... mask_native=pd.Series(mask), + ... s2_native=pd.Series(other), + ... ) 0 1 1 4 2 3 3 2 4 5 dtype: int64 + + >>> agnostic_zip_with( + ... s1_native=pa.chunked_array([data]), + ... mask_native=pa.chunked_array([mask]), + ... s2_native=pa.chunked_array([other]), + ... ) # doctest: +ELLIPSIS + + [ + [ + 1, + 4, + 3, + 2, + 5 + ] + ] """ return self._from_compliant_series( self._compliant_series.zip_with( @@ -2911,30 +3522,34 @@ def item(self: Self, index: int | None = None) -> Any: The scalar value of the Series or the element at the given index. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries Let's define a dataframe-agnostic function that returns item at given index - >>> def my_library_agnostic_function(s_native: IntoSeries, index=None): + >>> def agnostic_item(s_native: IntoSeries, index=None): ... s = nw.from_native(s_native, series_only=True) ... return s.item(index) - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_item`: >>> ( - ... my_library_agnostic_function(pl.Series("a", [1]), None), - ... my_library_agnostic_function(pd.Series([1]), None), + ... agnostic_item(pl.Series("a", [1]), None), + ... agnostic_item(pd.Series([1]), None), + ... agnostic_item(pa.chunked_array([[1]]), None), ... ) - (1, np.int64(1)) + (1, np.int64(1), 1) >>> ( - ... my_library_agnostic_function(pl.Series("a", [9, 8, 7]), -1), - ... my_library_agnostic_function(pl.Series([9, 8, 7]), -2), + ... agnostic_item(pl.Series("a", [9, 8, 7]), -1), + ... agnostic_item(pl.Series([9, 8, 7]), -2), + ... agnostic_item(pa.chunked_array([[9, 8, 7]]), -3), ... ) - (7, 8) + (7, 8, 9) """ return self._compliant_series.item(index=index) @@ -2948,29 +3563,33 @@ def head(self: Self, n: int = 10) -> Self: A new Series containing the first n characters of each string. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = list(range(10)) >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function that returns the first 3 rows: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_head(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.head(3).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_head`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_head(s_pd) 0 0 1 1 2 2 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_head(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -2978,6 +3597,16 @@ def head(self: Self, n: int = 10) -> Self: 1 2 ] + + >>> agnostic_head(s_pa) # doctest: +ELLIPSIS + + [ + [ + 0, + 1, + 2 + ] + ] """ return self._from_compliant_series(self._compliant_series.head(n)) @@ -2991,28 +3620,33 @@ def tail(self: Self, n: int = 10) -> Self: A new Series with the last n rows. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = list(range(10)) >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function that returns the last 3 rows: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_tail(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.tail(3).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_tail`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_tail(s_pd) 7 7 8 8 9 9 dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_tail(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -3020,6 +3654,16 @@ def tail(self: Self, n: int = 10) -> Self: 8 9 ] + + >>> agnostic_tail(s_pa) # doctest: +ELLIPSIS + + [ + [ + 7, + 8, + 9 + ] + ] """ return self._from_compliant_series(self._compliant_series.tail(n)) @@ -3041,29 +3685,33 @@ def round(self: Self, decimals: int = 0) -> Self: Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..). Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.12345, 2.56789, 3.901234] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function that rounds to the first decimal: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_round(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.round(1).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_round`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_round(s_pd) 0 1.1 1 2.6 2 3.9 dtype: float64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_round(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [f64] [ @@ -3071,6 +3719,16 @@ def round(self: Self, decimals: int = 0) -> Self: 2.6 3.9 ] + + >>> agnostic_round(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1.1, + 2.6, + 3.9 + ] + ] """ return self._from_compliant_series(self._compliant_series.round(decimals)) @@ -3091,37 +3749,42 @@ def to_dummies( between NaN and Null, whereas pandas doesn't. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries, IntoDataFrame >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + >>> data = [1, 2, 3] >>> s_pd = pd.Series(data, name="a") >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) - Let's define a dataframe-agnostic function that rounds to the first decimal: + Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function( + >>> def agnostic_to_dummies( ... s_native: IntoSeries, drop_first: bool = False ... ) -> IntoDataFrame: ... s = nw.from_native(s_native, series_only=True) ... return s.to_dummies(drop_first=drop_first).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_dummies`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_dummies(s_pd) a_1 a_2 a_3 0 1 0 0 1 0 1 0 2 0 0 1 - >>> my_library_agnostic_function(s_pd, drop_first=True) + >>> agnostic_to_dummies(s_pd, drop_first=True) a_2 a_3 0 0 0 1 1 0 2 0 1 - >>> my_library_agnostic_function(s_pl) + >>> agnostic_to_dummies(s_pl) shape: (3, 3) ┌─────┬─────┬─────┐ │ a_1 ┆ a_2 ┆ a_3 │ @@ -3132,7 +3795,8 @@ def to_dummies( │ 0 ┆ 1 ┆ 0 │ │ 0 ┆ 0 ┆ 1 │ └─────┴─────┴─────┘ - >>> my_library_agnostic_function(s_pl, drop_first=True) + + >>> agnostic_to_dummies(s_pl, drop_first=True) shape: (3, 2) ┌─────┬─────┐ │ a_2 ┆ a_3 │ @@ -3143,6 +3807,23 @@ def to_dummies( │ 1 ┆ 0 │ │ 0 ┆ 1 │ └─────┴─────┘ + + >>> agnostic_to_dummies(s_pa) + pyarrow.Table + _1: int8 + _2: int8 + _3: int8 + ---- + _1: [[1,0,0]] + _2: [[0,1,0]] + _3: [[0,0,1]] + >>> agnostic_to_dummies(s_pa, drop_first=True) + pyarrow.Table + _2: int8 + _3: int8 + ---- + _2: [[0,1,0]] + _3: [[0,0,1]] """ return self._dataframe( self._compliant_series.to_dummies(separator=separator, drop_first=drop_first), @@ -3160,33 +3841,48 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: A new Series with every nth value starting from the offset. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1, 2, 3, 4] - >>> s_pd = pd.Series(name="a", data=data) - >>> s_pl = pl.Series(name="a", values=data) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function in which gather every 2 rows, starting from a offset of 1: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_gather_every(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.gather_every(n=2, offset=1).to_native() - >>> my_library_agnostic_function(s_pd) + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_gather_every`: + + >>> agnostic_gather_every(s_pd) 1 2 3 4 - Name: a, dtype: int64 + dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest:+NORMALIZE_WHITESPACE + >>> agnostic_gather_every(s_pl) # doctest:+NORMALIZE_WHITESPACE shape: (2,) - Series: 'a' [i64] + Series: '' [i64] [ 2 4 ] + + >>> agnostic_gather_every(s_pa) # doctest:+ELLIPSIS + + [ + [ + 2, + 4 + ] + ] """ return self._from_compliant_series( self._compliant_series.gather_every(n=n, offset=offset) @@ -3199,22 +3895,36 @@ def to_arrow(self: Self) -> pa.Array: A PyArrow Array containing the data from the Series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries - >>> import pyarrow as pa >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeries + >>> data = [1, 2, 3, 4] - >>> s_pd = pd.Series(name="a", data=data) - >>> s_pl = pl.Series(name="a", values=data) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function that converts to arrow: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> pa.Array: + >>> def agnostic_to_arrow(s_native: IntoSeries) -> pa.Array: ... s = nw.from_native(s_native, series_only=True) ... return s.to_arrow() - >>> my_library_agnostic_function(s_pd) # doctest:+NORMALIZE_WHITESPACE + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_arrow`: + + >>> agnostic_to_arrow(s_pd) # doctest:+NORMALIZE_WHITESPACE + + [ + 1, + 2, + 3, + 4 + ] + + >>> agnostic_to_arrow(s_pl) # doctest:+NORMALIZE_WHITESPACE [ 1, @@ -3223,7 +3933,7 @@ def to_arrow(self: Self) -> pa.Array: 4 ] - >>> my_library_agnostic_function(s_pl) # doctest:+NORMALIZE_WHITESPACE + >>> agnostic_to_arrow(s_pa) # doctest:+NORMALIZE_WHITESPACE [ 1, @@ -3245,33 +3955,45 @@ def mode(self: Self) -> Self: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT >>> data = [1, 1, 2, 2, 3] - >>> s_pd = pd.Series(name="a", data=data) - >>> s_pl = pl.Series(name="a", values=data) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_mode(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.mode().sort().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_mode`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_mode(s_pd) 0 1 1 2 - Name: a, dtype: int64 + dtype: int64 - >>> my_library_agnostic_function(s_pl) # doctest:+NORMALIZE_WHITESPACE + >>> agnostic_mode(s_pl) # doctest:+NORMALIZE_WHITESPACE shape: (2,) - Series: 'a' [i64] + Series: '' [i64] [ 1 2 ] + + >>> agnostic_mode(s_pa) # doctest:+ELLIPSIS + + [ + [ + 1, + 2 + ] + ] """ return self._from_compliant_series(self._compliant_series.mode()) @@ -3287,31 +4009,31 @@ def is_finite(self: Self) -> Self: Expression of `Boolean` data type. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [float("nan"), float("inf"), 2.0, None] We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_is_finite(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.is_finite().to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_is_finite`: - >>> my_library_agnostic_function(pd.Series(data)) + >>> agnostic_is_finite(pd.Series(data)) 0 False 1 False 2 True 3 False dtype: bool - >>> my_library_agnostic_function( - ... pl.Series(data) - ... ) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_is_finite(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [bool] [ @@ -3321,7 +4043,7 @@ def is_finite(self: Self) -> Self: null ] - >>> my_library_agnostic_function(pa.chunked_array([data])) # doctest: +ELLIPSIS + >>> agnostic_is_finite(pa.chunked_array([data])) # doctest: +ELLIPSIS [ [ @@ -3344,28 +4066,31 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self: A new Series with the cumulative count of non-null values. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = ["x", "k", None, "d"] We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_cum_count(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.cum_count(reverse=True).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_count`: - >>> my_library_agnostic_function(pd.Series(data)) + >>> agnostic_cum_count(pd.Series(data)) 0 3 1 2 2 1 3 1 dtype: int64 - >>> my_library_agnostic_function(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + + >>> agnostic_cum_count(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE shape: (4,) Series: '' [u32] [ @@ -3374,7 +4099,8 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self: 1 1 ] - >>> my_library_agnostic_function(pa.chunked_array([data])) # doctest:+ELLIPSIS + + >>> agnostic_cum_count(pa.chunked_array([data])) # doctest:+ELLIPSIS [ [ @@ -3400,28 +4126,31 @@ def cum_min(self: Self, *, reverse: bool = False) -> Self: A new Series with the cumulative min of non-null values. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [3, 1, None, 2] We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_cum_min(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.cum_min().to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_min`: - >>> my_library_agnostic_function(pd.Series(data)) + >>> agnostic_cum_min(pd.Series(data)) 0 3.0 1 1.0 2 NaN 3 1.0 dtype: float64 - >>> my_library_agnostic_function(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + + >>> agnostic_cum_min(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE shape: (4,) Series: '' [i64] [ @@ -3430,7 +4159,8 @@ def cum_min(self: Self, *, reverse: bool = False) -> Self: null 1 ] - >>> my_library_agnostic_function(pa.chunked_array([data])) # doctest:+ELLIPSIS + + >>> agnostic_cum_min(pa.chunked_array([data])) # doctest:+ELLIPSIS [ [ @@ -3456,28 +4186,31 @@ def cum_max(self: Self, *, reverse: bool = False) -> Self: A new Series with the cumulative max of non-null values. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1, 3, None, 2] We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_cum_max(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.cum_max().to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_max`: - >>> my_library_agnostic_function(pd.Series(data)) + >>> agnostic_cum_max(pd.Series(data)) 0 1.0 1 3.0 2 NaN 3 3.0 dtype: float64 - >>> my_library_agnostic_function(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + + >>> agnostic_cum_max(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE shape: (4,) Series: '' [i64] [ @@ -3486,7 +4219,8 @@ def cum_max(self: Self, *, reverse: bool = False) -> Self: null 3 ] - >>> my_library_agnostic_function(pa.chunked_array([data])) # doctest:+ELLIPSIS + + >>> agnostic_cum_max(pa.chunked_array([data])) # doctest:+ELLIPSIS [ [ @@ -3512,28 +4246,31 @@ def cum_prod(self: Self, *, reverse: bool = False) -> Self: A new Series with the cumulative product of non-null values. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1, 3, None, 2] We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_cum_prod(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.cum_prod().to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_cum_prod`: - >>> my_library_agnostic_function(pd.Series(data)) + >>> agnostic_cum_prod(pd.Series(data)) 0 1.0 1 3.0 2 NaN 3 6.0 dtype: float64 - >>> my_library_agnostic_function(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE + + >>> agnostic_cum_prod(pl.Series(data)) # doctest:+NORMALIZE_WHITESPACE shape: (4,) Series: '' [i64] [ @@ -3542,7 +4279,8 @@ def cum_prod(self: Self, *, reverse: bool = False) -> Self: null 6 ] - >>> my_library_agnostic_function(pa.chunked_array([data])) # doctest:+ELLIPSIS + + >>> agnostic_cum_prod(pa.chunked_array([data])) # doctest:+ELLIPSIS [ [ @@ -3590,11 +4328,12 @@ def rolling_sum( A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.0, 2.0, 3.0, 4.0] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -3606,7 +4345,8 @@ def rolling_sum( ... s = nw.from_native(s_native, series_only=True) ... return s.rolling_sum(window_size=2).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_sum`: >>> agnostic_rolling_sum(s_pd) 0 NaN @@ -3683,11 +4423,12 @@ def rolling_mean( A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.0, 2.0, 3.0, 4.0] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -3699,7 +4440,8 @@ def rolling_mean( ... s = nw.from_native(s_native, series_only=True) ... return s.rolling_mean(window_size=2).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_mean`: >>> agnostic_rolling_mean(s_pd) 0 NaN @@ -3778,11 +4520,12 @@ def rolling_var( A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.0, 3.0, 1.0, 4.0] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -3794,7 +4537,8 @@ def rolling_var( ... s = nw.from_native(s_native, series_only=True) ... return s.rolling_var(window_size=2, min_periods=1).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_var`: >>> agnostic_rolling_var(s_pd) 0 NaN @@ -3871,11 +4615,12 @@ def rolling_std( A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.0, 3.0, 1.0, 4.0] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -3887,7 +4632,8 @@ def rolling_std( ... s = nw.from_native(s_native, series_only=True) ... return s.rolling_std(window_size=2, min_periods=1).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_std`: >>> agnostic_rolling_std(s_pd) 0 NaN @@ -3971,32 +4717,46 @@ def get_categories(self: Self) -> SeriesT: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["apple", "mango", "mango"] >>> s_pd = pd.Series(data, dtype="category") >>> s_pl = pl.Series(data, dtype=pl.Categorical) + >>> s_pa = pa.chunked_array([data]).dictionary_encode() We define a dataframe-agnostic function to get unique categories from column 'fruits': - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_get_categories(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.cat.get_categories().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_get_categories`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_get_categories(s_pd) 0 apple 1 mango dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_get_categories(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [str] [ "apple" "mango" ] + + >>> agnostic_get_categories(s_pa) # doctest: +ELLIPSIS + + [ + [ + "apple", + "mango" + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.cat.get_categories() @@ -4016,21 +4776,25 @@ def len_chars(self: Self) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["foo", "Café", "345", "東京", None] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_len_chars(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.str.len_chars().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_len_chars`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_len_chars(s_pd) 0 3.0 1 4.0 2 3.0 @@ -4038,7 +4802,7 @@ def len_chars(self: Self) -> SeriesT: 4 NaN dtype: float64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_len_chars(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (5,) Series: '' [u32] [ @@ -4048,6 +4812,18 @@ def len_chars(self: Self) -> SeriesT: 2 null ] + + >>> agnostic_len_chars(s_pa) # doctest: +ELLIPSIS + + [ + [ + 3, + 4, + 3, + 2, + null + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.len_chars() @@ -4070,33 +4846,46 @@ def replace( Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["123abc", "abc abc123"] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_replace(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... s = s.str.replace("abc", "") ... return s.to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_replace(s_pd) 0 123 1 abc123 dtype: object - >>> my_library_agnostic_function(s_pl) # doctest:+NORMALIZE_WHITESPACE + >>> agnostic_replace(s_pl) # doctest:+NORMALIZE_WHITESPACE shape: (2,) Series: '' [str] [ "123" " abc123" ] + + >>> agnostic_replace(s_pa) # doctest: +ELLIPSIS + + [ + [ + "123", + " abc123" + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.replace( @@ -4120,33 +4909,46 @@ def replace_all( Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["123abc", "abc abc123"] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_replace_all(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... s = s.str.replace_all("abc", "") ... return s.to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace_all`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_replace_all(s_pd) 0 123 1 123 dtype: object - >>> my_library_agnostic_function(s_pl) # doctest:+NORMALIZE_WHITESPACE + >>> agnostic_replace_all(s_pl) # doctest:+NORMALIZE_WHITESPACE shape: (2,) Series: '' [str] [ "123" " 123" ] + + >>> agnostic_replace_all(s_pa) # doctest: +ELLIPSIS + + [ + [ + "123", + " 123" + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.replace_all( @@ -4166,33 +4968,46 @@ def strip_chars(self: Self, characters: str | None = None) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["apple", "\nmango"] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_strip_chars(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... s = s.str.strip_chars() ... return s.to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_strip_chars`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_strip_chars(s_pd) 0 apple 1 mango dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_strip_chars(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [str] [ "apple" "mango" ] + + >>> agnostic_strip_chars(s_pa) # doctest: +ELLIPSIS + + [ + [ + "apple", + "mango" + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.strip_chars(characters) @@ -4210,27 +5025,31 @@ def starts_with(self: Self, prefix: str) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["apple", "mango", None] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_starts_with(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.str.starts_with("app").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_starts_with`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_starts_with(s_pd) 0 True 1 False 2 None dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_starts_with(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [bool] [ @@ -4238,6 +5057,16 @@ def starts_with(self: Self, prefix: str) -> SeriesT: false null ] + + >>> agnostic_starts_with(s_pa) # doctest: +ELLIPSIS + + [ + [ + true, + false, + null + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.starts_with(prefix) @@ -4255,27 +5084,31 @@ def ends_with(self: Self, suffix: str) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["apple", "mango", None] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_ends_with(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.str.ends_with("ngo").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_ends_with`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_ends_with(s_pd) 0 False 1 True 2 None dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_ends_with(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [bool] [ @@ -4283,6 +5116,16 @@ def ends_with(self: Self, suffix: str) -> SeriesT: true null ] + + >>> agnostic_ends_with(s_pa) # doctest: +ELLIPSIS + + [ + [ + false, + true, + null + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.ends_with(suffix) @@ -4305,10 +5148,11 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> SeriesT: >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> pets = ["cat", "dog", "rabbit and parrot", "dove", None] - >>> s_pd = pd.Series(pets) - >>> s_pl = pl.Series(pets) - >>> s_pa = pa.chunked_array([pets]) + + >>> data = ["cat", "dog", "rabbit and parrot", "dove", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: @@ -4367,28 +5211,32 @@ def slice(self: Self, offset: int, length: int | None = None) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["pear", None, "papaya", "dragonfruit"] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.str.slice(4, length=3).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_slice`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_slice(s_pd) # doctest: +NORMALIZE_WHITESPACE 0 1 None 2 ya 3 onf dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_slice(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [str] [ @@ -4398,20 +5246,31 @@ def slice(self: Self, offset: int, length: int | None = None) -> SeriesT: "onf" ] + >>> agnostic_slice(s_pa) # doctest: +ELLIPSIS + + [ + [ + "", + null, + "ya", + "onf" + ] + ] + Using negative indexes: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.str.slice(-3).to_native() - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_slice(s_pd) # doctest: +NORMALIZE_WHITESPACE 0 ear 1 None 2 aya 3 uit dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_slice(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [str] [ @@ -4420,6 +5279,17 @@ def slice(self: Self, offset: int, length: int | None = None) -> SeriesT: "aya" "uit" ] + + >>> agnostic_slice(s_pa) # doctest: +ELLIPSIS + + [ + [ + "ear", + null, + "aya", + "uit" + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.slice( @@ -4444,27 +5314,32 @@ def head(self: Self, n: int = 5) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> lyrics = ["Atatata", "taata", "taatatata", "zukkyun"] - >>> s_pd = pd.Series(lyrics) - >>> s_pl = pl.Series(lyrics) + + >>> data = ["Atatata", "taata", "taatatata", "zukkyun"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_head(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.str.head().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_head`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_head(s_pd) 0 Atata 1 taata 2 taata 3 zukky dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_head(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [str] [ @@ -4473,6 +5348,17 @@ def head(self: Self, n: int = 5) -> SeriesT: "taata" "zukky" ] + + >>> agnostic_head(s_pa) # doctest: +ELLIPSIS + + [ + [ + "Atata", + "taata", + "taata", + "zukky" + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.slice(offset=0, length=n) @@ -4495,27 +5381,32 @@ def tail(self: Self, n: int = 5) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT - >>> lyrics = ["Atatata", "taata", "taatatata", "zukkyun"] - >>> s_pd = pd.Series(lyrics) - >>> s_pl = pl.Series(lyrics) + + >>> data = ["Atatata", "taata", "taatatata", "zukkyun"] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_tail(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.str.tail().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_tail`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_tail(s_pd) 0 atata 1 taata 2 atata 3 kkyun dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_tail(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (4,) Series: '' [str] [ @@ -4524,6 +5415,17 @@ def tail(self: Self, n: int = 5) -> SeriesT: "atata" "kkyun" ] + + >>> agnostic_tail(s_pa) # doctest: +ELLIPSIS + + [ + [ + "atata", + "taata", + "atata", + "kkyun" + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.slice(offset=-n, length=None) @@ -4543,40 +5445,48 @@ def to_uppercase(self) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"fruits": ["apple", "mango", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["apple", "mango", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... upper_col=nw.col("fruits").str.to_uppercase() - ... ).to_native() + >>> def agnostic_to_uppercase(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.to_uppercase().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_uppercase`: - >>> my_library_agnostic_function(df_pd) # doctest: +NORMALIZE_WHITESPACE - fruits upper_col - 0 apple APPLE - 1 mango MANGO - 2 None None + >>> agnostic_to_uppercase(s_pd) + 0 APPLE + 1 MANGO + 2 None + dtype: object - >>> my_library_agnostic_function(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3, 2) - ┌────────┬───────────┐ - │ fruits ┆ upper_col │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞════════╪═══════════╡ - │ apple ┆ APPLE │ - │ mango ┆ MANGO │ - │ null ┆ null │ - └────────┴───────────┘ + >>> agnostic_to_uppercase(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [str] + [ + "APPLE" + "MANGO" + null + ] + >>> agnostic_to_uppercase(s_pa) # doctest: +ELLIPSIS + + [ + [ + "APPLE", + "MANGO", + null + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.to_uppercase() @@ -4591,40 +5501,48 @@ def to_lowercase(self) -> SeriesT: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT, IntoFrameT - >>> data = {"fruits": ["APPLE", "MANGO", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) + >>> from narwhals.typing import IntoSeriesT + + >>> data = ["APPLE", "MANGO", None] + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... lower_col=nw.col("fruits").str.to_lowercase() - ... ).to_native() + >>> def agnostic_to_lowercase(s_native: IntoSeriesT) -> IntoSeriesT: + ... s = nw.from_native(s_native, series_only=True) + ... return s.str.to_lowercase().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_lowercase`: - >>> my_library_agnostic_function(df_pd) # doctest: +NORMALIZE_WHITESPACE - fruits lower_col - 0 APPLE apple - 1 MANGO mango - 2 None None + >>> agnostic_to_lowercase(s_pd) + 0 apple + 1 mango + 2 None + dtype: object + >>> agnostic_to_lowercase(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [str] + [ + "apple" + "mango" + null + ] - >>> my_library_agnostic_function(df_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (3, 2) - ┌────────┬───────────┐ - │ fruits ┆ lower_col │ - │ --- ┆ --- │ - │ str ┆ str │ - ╞════════╪═══════════╡ - │ APPLE ┆ apple │ - │ MANGO ┆ mango │ - │ null ┆ null │ - └────────┴───────────┘ + >>> agnostic_to_lowercase(s_pa) # doctest: +ELLIPSIS + + [ + [ + "apple", + "mango", + null + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.str.to_lowercase() @@ -4656,6 +5574,7 @@ def to_datetime(self: Self, format: str | None = None) -> SeriesT: # noqa: A002 >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["2020-01-01", "2020-01-02"] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -4663,24 +5582,27 @@ def to_datetime(self: Self, format: str | None = None) -> SeriesT: # noqa: A002 We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_to_datetime(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.str.to_datetime(format="%Y-%m-%d").to_native() - We can then pass any supported library such as pandas, Polars, or PyArrow:: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_datetime`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_datetime(s_pd) 0 2020-01-01 1 2020-01-02 dtype: datetime64[ns] - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_to_datetime(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [datetime[μs]] [ 2020-01-01 00:00:00 2020-01-02 00:00:00 ] - >>> my_library_agnostic_function(s_pa) # doctest: +ELLIPSIS + + >>> agnostic_to_datetime(s_pa) # doctest: +ELLIPSIS [ [ @@ -4708,35 +5630,48 @@ def date(self: Self) -> SeriesT: NotImplementedError: If pandas default backend is being used. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)] >>> s_pd = pd.Series(dates).convert_dtypes(dtype_backend="pyarrow") >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_date(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.date().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_date`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_date(s_pd) 0 2012-01-07 1 2023-03-10 dtype: date32[day][pyarrow] - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_date(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [date] [ 2012-01-07 2023-03-10 ] + + >>> agnostic_date(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2012-01-07, + 2023-03-10 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.date() @@ -4749,34 +5684,48 @@ def year(self: Self) -> SeriesT: A new Series containing the year component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [datetime(2012, 1, 7), datetime(2023, 3, 10)] >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_year(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.year().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_year`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_year(s_pd) 0 2012 1 2023 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_year(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i32] [ 2012 2023 ] + + >>> agnostic_year(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2012, + 2023 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.year() @@ -4789,34 +5738,47 @@ def month(self: Self) -> SeriesT: A new Series containing the month component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [datetime(2023, 2, 1), datetime(2023, 8, 3)] >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_month(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.month().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_month`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_month(s_pd) 0 2 1 8 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_month(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i8] [ 2 8 ] + + >>> agnostic_month(s_pa) # doctest: +ELLIPSIS + + [ + [ + 2, + 8 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.month() @@ -4829,34 +5791,48 @@ def day(self: Self) -> SeriesT: A new Series containing the day component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [datetime(2022, 1, 1), datetime(2022, 1, 5)] >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_day(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.day().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_day`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_day(s_pd) 0 1 1 5 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_day(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i8] [ 1 5 ] + + >>> agnostic_day(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 5 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.day() @@ -4869,34 +5845,48 @@ def hour(self: Self) -> SeriesT: A new Series containing the hour component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)] >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_hour(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.hour().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_hour`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_hour(s_pd) 0 5 1 9 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_hour(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i8] [ 5 9 ] + + >>> agnostic_hour(s_pa) # doctest: +ELLIPSIS + + [ + [ + 5, + 9 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.hour() @@ -4909,34 +5899,48 @@ def minute(self: Self) -> SeriesT: A new Series containing the minute component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)] >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_minute(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.minute().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_minute`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_minute(s_pd) 0 3 1 12 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_minute(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i8] [ 3 12 ] + + >>> agnostic_minute(s_pa) # doctest: +ELLIPSIS + + [ + [ + 3, + 12 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.minute() @@ -4949,34 +5953,48 @@ def second(self: Self) -> SeriesT: A new Series containing the second component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [datetime(2022, 1, 1, 5, 3, 10), datetime(2022, 1, 5, 9, 12, 4)] >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_second(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.second().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_second`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_second(s_pd) 0 10 1 4 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_second(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i8] [ 10 4 ] + + >>> agnostic_second(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 4 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.second() @@ -4989,11 +6007,13 @@ def millisecond(self: Self) -> SeriesT: A new Series containing the millisecond component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [ ... datetime(2023, 5, 21, 12, 55, 10, 400000), ... datetime(2023, 5, 21, 12, 55, 10, 600000), @@ -5001,26 +6021,28 @@ def millisecond(self: Self) -> SeriesT: ... datetime(2023, 5, 21, 12, 55, 11, 0), ... datetime(2023, 5, 21, 12, 55, 11, 200000), ... ] - >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_millisecond(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.millisecond().alias("datetime").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_millisecond`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_millisecond(s_pd) 0 400 1 600 2 800 3 0 4 200 Name: datetime, dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_millisecond(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (5,) Series: 'datetime' [i32] [ @@ -5030,6 +6052,18 @@ def millisecond(self: Self) -> SeriesT: 0 200 ] + + >>> agnostic_millisecond(s_pa) # doctest: +ELLIPSIS + + [ + [ + 400, + 600, + 800, + 0, + 200 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.millisecond() @@ -5042,11 +6076,13 @@ def microsecond(self: Self) -> SeriesT: A new Series containing the microsecond component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [ ... datetime(2023, 5, 21, 12, 55, 10, 400000), ... datetime(2023, 5, 21, 12, 55, 10, 600000), @@ -5054,26 +6090,28 @@ def microsecond(self: Self) -> SeriesT: ... datetime(2023, 5, 21, 12, 55, 11, 0), ... datetime(2023, 5, 21, 12, 55, 11, 200000), ... ] - >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_microsecond(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.microsecond().alias("datetime").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_microsecond`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_microsecond(s_pd) 0 400000 1 600000 2 800000 3 0 4 200000 Name: datetime, dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_microsecond(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (5,) Series: 'datetime' [i32] [ @@ -5083,6 +6121,18 @@ def microsecond(self: Self) -> SeriesT: 0 200000 ] + + >>> agnostic_microsecond(s_pa) # doctest: +ELLIPSIS + + [ + [ + 400000, + 600000, + 800000, + 0, + 200000 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.microsecond() @@ -5095,37 +6145,51 @@ def nanosecond(self: Self) -> SeriesT: A new Series containing the nanosecond component of each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> dates = [ ... datetime(2022, 1, 1, 5, 3, 10, 500000), ... datetime(2022, 1, 5, 9, 12, 4, 60000), ... ] >>> s_pd = pd.Series(dates) >>> s_pl = pl.Series(dates) + >>> s_pa = pa.chunked_array([dates]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_nanosecond(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.nanosecond().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_nanosecond`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_nanosecond(s_pd) 0 500000000 1 60000000 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_nanosecond(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i32] [ 500000000 60000000 ] + + >>> agnostic_nanosecond(s_pa) # doctest: +ELLIPSIS + + [ + [ + 500000000, + 60000000 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.nanosecond() @@ -5138,34 +6202,49 @@ def ordinal_day(self: Self) -> SeriesT: A new Series containing the ordinal day (day of year) for each datetime value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import datetime + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_ordinal_day(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.ordinal_day().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_ordinal_day`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_ordinal_day(s_pd) 0 1 1 216 dtype: int32 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_ordinal_day(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i16] [ 1 216 ] + + + >>> agnostic_ordinal_day(s_pa) # doctest: +ELLIPSIS + + [ + [ + 1, + 216 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.ordinal_day() @@ -5183,34 +6262,48 @@ def total_minutes(self: Self) -> SeriesT: A new Series containing the total number of minutes for each timedelta value. Examples: + >>> from datetime import timedelta >>> import pandas as pd >>> import polars as pl - >>> from datetime import timedelta + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [timedelta(minutes=10), timedelta(minutes=20, seconds=40)] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_total_minutes(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.total_minutes().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_minutes`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_total_minutes(s_pd) 0 10 1 20 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_total_minutes(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i64] [ 10 20 ] + + >>> agnostic_total_minutes(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 20 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.total_minutes() @@ -5228,34 +6321,48 @@ def total_seconds(self: Self) -> SeriesT: A new Series containing the total number of seconds for each timedelta value. Examples: + >>> from datetime import timedelta >>> import pandas as pd >>> import polars as pl - >>> from datetime import timedelta + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_total_seconds(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.total_seconds().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_seconds`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_total_seconds(s_pd) 0 10 1 20 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_total_seconds(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i64] [ 10 20 ] + + >>> agnostic_total_seconds(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 20 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.total_seconds() @@ -5273,37 +6380,51 @@ def total_milliseconds(self: Self) -> SeriesT: A new Series containing the total number of milliseconds for each timedelta value. Examples: + >>> from datetime import timedelta >>> import pandas as pd >>> import polars as pl - >>> from datetime import timedelta + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [ ... timedelta(milliseconds=10), ... timedelta(milliseconds=20, microseconds=40), ... ] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_total_milliseconds(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.total_milliseconds().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_milliseconds`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_total_milliseconds(s_pd) 0 10 1 20 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_total_milliseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i64] [ 10 20 ] + + >>> agnostic_total_milliseconds(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 20 + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.total_milliseconds() @@ -5321,36 +6442,50 @@ def total_microseconds(self: Self) -> SeriesT: consider using `fill_null()` in this case. Examples: + >>> from datetime import timedelta >>> import pandas as pd >>> import polars as pl - >>> from datetime import timedelta + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [ ... timedelta(microseconds=10), ... timedelta(milliseconds=1, microseconds=200), ... ] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_total_microseconds(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.total_microseconds().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_microseconds`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_total_microseconds(s_pd) 0 10 1 1200 dtype: int... - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_total_microseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i64] [ - 10 - 1200 + 10 + 1200 + ] + + >>> agnostic_total_microseconds(s_pa) # doctest: +ELLIPSIS + + [ + [ + 10, + 1200 + ] ] """ return self._narwhals_series._from_compliant_series( @@ -5369,28 +6504,32 @@ def total_nanoseconds(self: Self) -> SeriesT: A new Series containing the total number of nanoseconds for each timedelta value. Examples: + >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl - >>> from datetime import timedelta + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"] >>> s_pd = pd.to_datetime(pd.Series(data)) >>> s_pl = pl.Series(data).str.to_datetime(time_unit="ns") We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_total_nanoseconds(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.diff().dt.total_nanoseconds().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_total_nanoseconds`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_total_nanoseconds(s_pd) 0 NaN 1 1.0 dtype: float64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_total_nanoseconds(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [i64] [ @@ -5446,8 +6585,10 @@ def to_string(self: Self, format: str) -> SeriesT: # noqa: A002 >>> from datetime import datetime >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [ ... datetime(2020, 3, 1), ... datetime(2020, 4, 1), @@ -5455,22 +6596,24 @@ def to_string(self: Self, format: str) -> SeriesT: # noqa: A002 ... ] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) + >>> s_pa = pa.chunked_array([data]) We define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_to_string(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.to_string("%Y/%m/%d").to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_string`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_string(s_pd) 0 2020/03/01 1 2020/04/01 2 2020/05/01 dtype: object - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_to_string(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [str] [ @@ -5478,6 +6621,16 @@ def to_string(self: Self, format: str) -> SeriesT: # noqa: A002 "2020/04/01" "2020/05/01" ] + + >>> agnostic_to_string(s_pa) # doctest: +ELLIPSIS + + [ + [ + "2020/03/01", + "2020/04/01", + "2020/05/01" + ] + ] """ return self._narwhals_series._from_compliant_series( self._narwhals_series._compliant_series.dt.to_string(format) @@ -5494,11 +6647,12 @@ def replace_time_zone(self: Self, time_zone: str | None) -> SeriesT: Examples: >>> from datetime import datetime, timezone - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [ ... datetime(2024, 1, 1, tzinfo=timezone.utc), ... datetime(2024, 1, 2, tzinfo=timezone.utc), @@ -5509,24 +6663,27 @@ def replace_time_zone(self: Self, time_zone: str | None) -> SeriesT: Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_replace_time_zone(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.replace_time_zone("Asia/Kathmandu").to_native() - We can then pass pandas / PyArrow / Polars / any other supported library: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_replace_time_zone`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_replace_time_zone(s_pd) 0 2024-01-01 00:00:00+05:45 1 2024-01-02 00:00:00+05:45 dtype: datetime64[ns, Asia/Kathmandu] - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_replace_time_zone(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [datetime[μs, Asia/Kathmandu]] [ 2024-01-01 00:00:00 +0545 2024-01-02 00:00:00 +0545 ] - >>> my_library_agnostic_function(s_pa) + + >>> agnostic_replace_time_zone(s_pa) [ [ @@ -5553,11 +6710,12 @@ def convert_time_zone(self: Self, time_zone: str) -> SeriesT: Examples: >>> from datetime import datetime, timezone - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [ ... datetime(2024, 1, 1, tzinfo=timezone.utc), ... datetime(2024, 1, 2, tzinfo=timezone.utc), @@ -5568,24 +6726,27 @@ def convert_time_zone(self: Self, time_zone: str) -> SeriesT: Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_convert_time_zone(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.convert_time_zone("Asia/Kathmandu").to_native() - We can then pass pandas / PyArrow / Polars / any other supported library: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_convert_time_zone`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_convert_time_zone(s_pd) 0 2024-01-01 05:45:00+05:45 1 2024-01-02 05:45:00+05:45 dtype: datetime64[ns, Asia/Kathmandu] - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_convert_time_zone(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (2,) Series: '' [datetime[μs, Asia/Kathmandu]] [ 2024-01-01 05:45:00 +0545 2024-01-02 05:45:00 +0545 ] - >>> my_library_agnostic_function(s_pa) + + >>> agnostic_convert_time_zone(s_pa) [ [ @@ -5613,11 +6774,12 @@ def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> Series Examples: >>> from datetime import date - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [date(2001, 1, 1), None, date(2001, 1, 3)] >>> s_pd = pd.Series(data, dtype="datetime64[ns]") >>> s_pl = pl.Series(data) @@ -5625,18 +6787,20 @@ def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> Series Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_timestamp(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.dt.timestamp("ms").to_native() - We can then pass pandas / PyArrow / Polars / any other supported library: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_timestamp`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_timestamp(s_pd) 0 9.783072e+11 1 NaN 2 9.784800e+11 dtype: float64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + + >>> agnostic_timestamp(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: '' [i64] [ @@ -5644,7 +6808,8 @@ def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> Series null 978480000000 ] - >>> my_library_agnostic_function(s_pa) + + >>> agnostic_timestamp(s_pa) [ [ @@ -5678,11 +6843,12 @@ def len(self: Self) -> SeriesT: A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [[1, 2], [3, 4, None], None, []] Let's define a dataframe-agnostic function: diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 22afc687d..8bf4a4b1e 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -501,27 +501,32 @@ def to_frame(self) -> DataFrame[Any]: Examples: >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries, IntoDataFrame - >>> s = [1, 2, 3] - >>> s_pd = pd.Series(s, name="a") - >>> s_pl = pl.Series("a", s) + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 2] + >>> s_pd = pd.Series(data, name="a") + >>> s_pl = pl.Series("a", data) + >>> s_pa = pa.chunked_array([data]) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> IntoDataFrame: + >>> def agnostic_to_frame(s_native: IntoSeries) -> IntoDataFrame: ... s = nw.from_native(s_native, series_only=True) ... return s.to_frame().to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_to_frame`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_to_frame(s_pd) a 0 1 1 2 - 2 3 - >>> my_library_agnostic_function(s_pl) - shape: (3, 1) + + >>> agnostic_to_frame(s_pl) + shape: (2, 1) ┌─────┐ │ a │ │ --- │ @@ -529,8 +534,13 @@ def to_frame(self) -> DataFrame[Any]: ╞═════╡ │ 1 │ │ 2 │ - │ 3 │ └─────┘ + + >>> agnostic_to_frame(s_pa) + pyarrow.Table + : int64 + ---- + : [[1,2]] """ return super().to_frame() # type: ignore[return-value] @@ -558,28 +568,34 @@ def value_counts( - Either count or proportion as second column, depending on normalize parameter. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeries, IntoDataFrame >>> import pandas as pd >>> import polars as pl - >>> s_pd = pd.Series([1, 1, 2, 3, 2], name="s") - >>> s_pl = pl.Series(values=[1, 1, 2, 3, 2], name="s") + >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoDataFrame + >>> from narwhals.typing import IntoSeries + + >>> data = [1, 1, 2, 3, 2] + >>> s_pd = pd.Series(data, name="s") + >>> s_pl = pl.Series(values=data, name="s") + >>> s_pa = pa.chunked_array([data]) Let's define a dataframe-agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeries) -> IntoDataFrame: + >>> def agnostic_value_counts(s_native: IntoSeries) -> IntoDataFrame: ... s = nw.from_native(s_native, series_only=True) ... return s.value_counts(sort=True).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_value_counts`: - >>> my_library_agnostic_function(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_value_counts(s_pd) s count 0 1 2 1 2 2 2 3 1 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_value_counts(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3, 2) ┌─────┬───────┐ │ s ┆ count │ @@ -590,6 +606,14 @@ def value_counts( │ 2 ┆ 2 │ │ 3 ┆ 1 │ └─────┴───────┘ + + >>> agnostic_value_counts(s_pa) + pyarrow.Table + : int64 + count: int64 + ---- + : [[1,2,3]] + count: [[2,2,1]] """ return super().value_counts( # type: ignore[return-value] sort=sort, parallel=parallel, name=name, normalize=normalize @@ -652,25 +676,27 @@ def ewm_mean( >>> import polars as pl >>> import narwhals as nw >>> from narwhals.typing import IntoSeriesT + >>> data = [1, 2, 3] >>> s_pd = pd.Series(name="a", data=data) >>> s_pl = pl.Series(name="a", values=data) We define a library agnostic function: - >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_ewm_mean(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.ewm_mean(com=1, ignore_nulls=False).to_native() - We can then pass either pandas or Polars to `func`: + We can then pass any supported library such as pandas or Polars + to `agnostic_ewm_mean`: - >>> my_library_agnostic_function(s_pd) + >>> agnostic_ewm_mean(s_pd) 0 1.000000 1 1.666667 2 2.428571 Name: a, dtype: float64 - >>> my_library_agnostic_function(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> agnostic_ewm_mean(s_pl) # doctest: +NORMALIZE_WHITESPACE shape: (3,) Series: 'a' [f64] [ @@ -729,11 +755,12 @@ def rolling_sum( A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.0, 2.0, 3.0, 4.0] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -745,7 +772,8 @@ def rolling_sum( ... s = nw.from_native(s_native, series_only=True) ... return s.rolling_sum(window_size=2).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_sum`: >>> agnostic_rolling_sum(s_pd) 0 NaN @@ -821,11 +849,12 @@ def rolling_mean( A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.0, 2.0, 3.0, 4.0] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -837,7 +866,8 @@ def rolling_mean( ... s = nw.from_native(s_native, series_only=True) ... return s.rolling_mean(window_size=2).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_mean`: >>> agnostic_rolling_mean(s_pd) 0 NaN @@ -915,11 +945,12 @@ def rolling_var( A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.0, 3.0, 1.0, 4.0] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -931,7 +962,8 @@ def rolling_var( ... s = nw.from_native(s_native, series_only=True) ... return s.rolling_var(window_size=2, min_periods=1).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_var`: >>> agnostic_rolling_var(s_pd) 0 NaN @@ -1010,11 +1042,12 @@ def rolling_std( A new series. Examples: - >>> import narwhals as nw - >>> from narwhals.typing import IntoSeriesT >>> import pandas as pd >>> import polars as pl >>> import pyarrow as pa + >>> import narwhals as nw + >>> from narwhals.typing import IntoSeriesT + >>> data = [1.0, 3.0, 1.0, 4.0] >>> s_pd = pd.Series(data) >>> s_pl = pl.Series(data) @@ -1026,7 +1059,8 @@ def rolling_std( ... s = nw.from_native(s_native, series_only=True) ... return s.rolling_std(window_size=2, min_periods=1).to_native() - We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`: + We can then pass any supported library such as pandas, Polars, or + PyArrow to `agnostic_rolling_std`: >>> agnostic_rolling_std(s_pd) 0 NaN