diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/404.html b/404.html new file mode 100644 index 000000000..771c84ed3 --- /dev/null +++ b/404.html @@ -0,0 +1,1538 @@ + + + +
+ + + + + + + + + + + + + + +Class | +Method | +pandas-like | +arrow | +
---|---|---|---|
DataFrame | +clone | ++ | + |
DataFrame | +collect_schema | ++ | + |
DataFrame | +columns | ++ | + |
DataFrame | +drop | ++ | + |
DataFrame | +drop_nulls | ++ | + |
DataFrame | +estimated_size | ++ | + |
DataFrame | +filter | ++ | + |
DataFrame | +gather_every | ++ | + |
DataFrame | +get_column | ++ | + |
DataFrame | +group_by | ++ | + |
DataFrame | +head | ++ | + |
DataFrame | +implementation | ++ | + |
DataFrame | +is_duplicated | ++ | + |
DataFrame | +is_empty | ++ | + |
DataFrame | +is_unique | ++ | + |
DataFrame | +item | ++ | + |
DataFrame | +iter_rows | ++ | + |
DataFrame | +join | ++ | + |
DataFrame | +join_asof | ++ | + |
DataFrame | +lazy | ++ | + |
DataFrame | +null_count | ++ | + |
DataFrame | +pipe | ++ | + |
DataFrame | +pivot | ++ | + |
DataFrame | +rename | ++ | + |
DataFrame | +row | ++ | + |
DataFrame | +rows | ++ | + |
DataFrame | +sample | ++ | + |
DataFrame | +schema | ++ | + |
DataFrame | +select | ++ | + |
DataFrame | +shape | ++ | + |
DataFrame | +sort | ++ | + |
DataFrame | +tail | ++ | + |
DataFrame | +to_arrow | ++ | + |
DataFrame | +to_dict | ++ | + |
DataFrame | +to_native | ++ | + |
DataFrame | +to_numpy | ++ | + |
DataFrame | +to_pandas | ++ | + |
DataFrame | +unique | ++ | + |
DataFrame | +unpivot | ++ | + |
DataFrame | +with_columns | ++ | + |
DataFrame | +with_row_index | ++ | + |
DataFrame | +write_csv | ++ | + |
DataFrame | +write_parquet | ++ | + |
Class | +Method | +pandas-like | +arrow | +dask | +
---|---|---|---|---|
Expr | +abs | ++ | + | + |
Expr | +alias | ++ | + | + |
Expr | +all | ++ | + | + |
Expr | +any | ++ | + | + |
Expr | +arg_max | ++ | + | + |
Expr | +arg_min | ++ | + | + |
Expr | +arg_true | ++ | + | + |
Expr | +cast | ++ | + | + |
Expr | +cat | ++ | + | + |
Expr | +clip | ++ | + | + |
Expr | +count | ++ | + | + |
Expr | +cum_count | ++ | + | + |
Expr | +cum_max | ++ | + | + |
Expr | +cum_min | ++ | + | + |
Expr | +cum_prod | ++ | + | + |
Expr | +cum_sum | ++ | + | + |
Expr | +diff | ++ | + | + |
Expr | +drop_nulls | ++ | + | + |
Expr | +dt | ++ | + | + |
Expr | +ewm_mean | ++ | + | + |
Expr | +fill_null | ++ | + | + |
Expr | +filter | ++ | + | + |
Expr | +gather_every | ++ | + | + |
Expr | +head | ++ | + | + |
Expr | +is_between | ++ | + | + |
Expr | +is_duplicated | ++ | + | + |
Expr | +is_finite | ++ | + | + |
Expr | +is_first_distinct | ++ | + | + |
Expr | +is_in | ++ | + | + |
Expr | +is_last_distinct | ++ | + | + |
Expr | +is_null | ++ | + | + |
Expr | +is_unique | ++ | + | + |
Expr | +len | ++ | + | + |
Expr | +list | ++ | + | + |
Expr | +map_batches | ++ | + | + |
Expr | +max | ++ | + | + |
Expr | +mean | ++ | + | + |
Expr | +median | ++ | + | + |
Expr | +min | ++ | + | + |
Expr | +mode | ++ | + | + |
Expr | +n_unique | ++ | + | + |
Expr | +name | ++ | + | + |
Expr | +null_count | ++ | + | + |
Expr | +over | ++ | + | + |
Expr | +pipe | ++ | + | + |
Expr | +quantile | ++ | + | + |
Expr | +replace_strict | ++ | + | + |
Expr | +rolling_mean | ++ | + | + |
Expr | +rolling_sum | ++ | + | + |
Expr | +round | ++ | + | + |
Expr | +sample | ++ | + | + |
Expr | +shift | ++ | + | + |
Expr | +skew | ++ | + | + |
Expr | +sort | ++ | + | + |
Expr | +std | ++ | + | + |
Expr | +str | ++ | + | + |
Expr | +sum | ++ | + | + |
Expr | +tail | ++ | + | + |
Expr | +unique | ++ | + | + |
ExprCatNamespace | +get_categories | ++ | + | + |
ExprDateTimeNamespace | +convert_time_zone | ++ | + | + |
ExprDateTimeNamespace | +date | ++ | + | + |
ExprDateTimeNamespace | +day | ++ | + | + |
ExprDateTimeNamespace | +hour | ++ | + | + |
ExprDateTimeNamespace | +microsecond | ++ | + | + |
ExprDateTimeNamespace | +millisecond | ++ | + | + |
ExprDateTimeNamespace | +minute | ++ | + | + |
ExprDateTimeNamespace | +month | ++ | + | + |
ExprDateTimeNamespace | +nanosecond | ++ | + | + |
ExprDateTimeNamespace | +ordinal_day | ++ | + | + |
ExprDateTimeNamespace | +replace_time_zone | ++ | + | + |
ExprDateTimeNamespace | +second | ++ | + | + |
ExprDateTimeNamespace | +timestamp | ++ | + | + |
ExprDateTimeNamespace | +to_string | ++ | + | + |
ExprDateTimeNamespace | +total_microseconds | ++ | + | + |
ExprDateTimeNamespace | +total_milliseconds | ++ | + | + |
ExprDateTimeNamespace | +total_minutes | ++ | + | + |
ExprDateTimeNamespace | +total_nanoseconds | ++ | + | + |
ExprDateTimeNamespace | +total_seconds | ++ | + | + |
ExprDateTimeNamespace | +year | ++ | + | + |
ExprListNamespace | +len | ++ | + | + |
ExprNameNamespace | +keep | ++ | + | + |
ExprNameNamespace | +map | ++ | + | + |
ExprNameNamespace | +prefix | ++ | + | + |
ExprNameNamespace | +suffix | ++ | + | + |
ExprNameNamespace | +to_lowercase | ++ | + | + |
ExprNameNamespace | +to_uppercase | ++ | + | + |
ExprStringNamespace | +contains | ++ | + | + |
ExprStringNamespace | +ends_with | ++ | + | + |
ExprStringNamespace | +head | ++ | + | + |
ExprStringNamespace | +len_chars | ++ | + | + |
ExprStringNamespace | +replace | ++ | + | + |
ExprStringNamespace | +replace_all | ++ | + | + |
ExprStringNamespace | +slice | ++ | + | + |
ExprStringNamespace | +starts_with | ++ | + | + |
ExprStringNamespace | +strip_chars | ++ | + | + |
ExprStringNamespace | +tail | ++ | + | + |
ExprStringNamespace | +to_datetime | ++ | + | + |
ExprStringNamespace | +to_lowercase | ++ | + | + |
ExprStringNamespace | +to_uppercase | ++ | + | + |
Narwhals has two different level of support for libraries: "full" and "interchange".
+Libraries for which we have full support we intend to support the whole Narwhals API, +however this is a continuous work in progress.
+In the following section it is possible to check which method is implemented for which +class and backend.
+Info
+Class | +Method | +arrow | +dask | +
---|---|---|---|
LazyFrame | +clone | ++ | + |
LazyFrame | +collect | ++ | + |
LazyFrame | +collect_schema | ++ | + |
LazyFrame | +columns | ++ | + |
LazyFrame | +drop | ++ | + |
LazyFrame | +drop_nulls | ++ | + |
LazyFrame | +filter | ++ | + |
LazyFrame | +gather_every | ++ | + |
LazyFrame | +group_by | ++ | + |
LazyFrame | +head | ++ | + |
LazyFrame | +implementation | ++ | + |
LazyFrame | +join | ++ | + |
LazyFrame | +join_asof | ++ | + |
LazyFrame | +lazy | ++ | + |
LazyFrame | +pipe | ++ | + |
LazyFrame | +rename | ++ | + |
LazyFrame | +schema | ++ | + |
LazyFrame | +select | ++ | + |
LazyFrame | +sort | ++ | + |
LazyFrame | +tail | ++ | + |
LazyFrame | +to_native | ++ | + |
LazyFrame | +unique | ++ | + |
LazyFrame | +unpivot | ++ | + |
LazyFrame | +with_columns | ++ | + |
LazyFrame | +with_row_index | ++ | + |
Class | +Method | +pandas-like | +arrow | +
---|---|---|---|
Series | +abs | ++ | + |
Series | +alias | ++ | + |
Series | +all | ++ | + |
Series | +any | ++ | + |
Series | +arg_max | ++ | + |
Series | +arg_min | ++ | + |
Series | +arg_true | ++ | + |
Series | +cast | ++ | + |
Series | +cat | ++ | + |
Series | +clip | ++ | + |
Series | +count | ++ | + |
Series | +cum_count | ++ | + |
Series | +cum_max | ++ | + |
Series | +cum_min | ++ | + |
Series | +cum_prod | ++ | + |
Series | +cum_sum | ++ | + |
Series | +diff | ++ | + |
Series | +drop_nulls | ++ | + |
Series | +dt | ++ | + |
Series | +dtype | ++ | + |
Series | +ewm_mean | ++ | + |
Series | +fill_null | ++ | + |
Series | +filter | ++ | + |
Series | +gather_every | ++ | + |
Series | +head | ++ | + |
Series | +implementation | ++ | + |
Series | +is_between | ++ | + |
Series | +is_duplicated | ++ | + |
Series | +is_empty | ++ | + |
Series | +is_finite | ++ | + |
Series | +is_first_distinct | ++ | + |
Series | +is_in | ++ | + |
Series | +is_last_distinct | ++ | + |
Series | +is_null | ++ | + |
Series | +is_sorted | ++ | + |
Series | +is_unique | ++ | + |
Series | +item | ++ | + |
Series | +len | ++ | + |
Series | +list | ++ | + |
Series | +max | ++ | + |
Series | +mean | ++ | + |
Series | +median | ++ | + |
Series | +min | ++ | + |
Series | +mode | ++ | + |
Series | +n_unique | ++ | + |
Series | +name | ++ | + |
Series | +null_count | ++ | + |
Series | +pipe | ++ | + |
Series | +quantile | ++ | + |
Series | +rename | ++ | + |
Series | +replace_strict | ++ | + |
Series | +rolling_mean | ++ | + |
Series | +rolling_sum | ++ | + |
Series | +round | ++ | + |
Series | +sample | ++ | + |
Series | +scatter | ++ | + |
Series | +shape | ++ | + |
Series | +shift | ++ | + |
Series | +skew | ++ | + |
Series | +sort | ++ | + |
Series | +std | ++ | + |
Series | +str | ++ | + |
Series | +sum | ++ | + |
Series | +tail | ++ | + |
Series | +to_arrow | ++ | + |
Series | +to_dummies | ++ | + |
Series | +to_frame | ++ | + |
Series | +to_list | ++ | + |
Series | +to_native | ++ | + |
Series | +to_numpy | ++ | + |
Series | +to_pandas | ++ | + |
Series | +unique | ++ | + |
Series | +value_counts | ++ | + |
Series | +zip_with | ++ | + |
SeriesCatNamespace | +get_categories | ++ | + |
SeriesDateTimeNamespace | +convert_time_zone | ++ | + |
SeriesDateTimeNamespace | +date | ++ | + |
SeriesDateTimeNamespace | +day | ++ | + |
SeriesDateTimeNamespace | +hour | ++ | + |
SeriesDateTimeNamespace | +microsecond | ++ | + |
SeriesDateTimeNamespace | +millisecond | ++ | + |
SeriesDateTimeNamespace | +minute | ++ | + |
SeriesDateTimeNamespace | +month | ++ | + |
SeriesDateTimeNamespace | +nanosecond | ++ | + |
SeriesDateTimeNamespace | +ordinal_day | ++ | + |
SeriesDateTimeNamespace | +replace_time_zone | ++ | + |
SeriesDateTimeNamespace | +second | ++ | + |
SeriesDateTimeNamespace | +timestamp | ++ | + |
SeriesDateTimeNamespace | +to_string | ++ | + |
SeriesDateTimeNamespace | +total_microseconds | ++ | + |
SeriesDateTimeNamespace | +total_milliseconds | ++ | + |
SeriesDateTimeNamespace | +total_minutes | ++ | + |
SeriesDateTimeNamespace | +total_nanoseconds | ++ | + |
SeriesDateTimeNamespace | +total_seconds | ++ | + |
SeriesDateTimeNamespace | +year | ++ | + |
SeriesListNamespace | +len | ++ | + |
SeriesStringNamespace | +contains | ++ | + |
SeriesStringNamespace | +ends_with | ++ | + |
SeriesStringNamespace | +head | ++ | + |
SeriesStringNamespace | +len_chars | ++ | + |
SeriesStringNamespace | +replace | ++ | + |
SeriesStringNamespace | +replace_all | ++ | + |
SeriesStringNamespace | +slice | ++ | + |
SeriesStringNamespace | +starts_with | ++ | + |
SeriesStringNamespace | +strip_chars | ++ | + |
SeriesStringNamespace | +tail | ++ | + |
SeriesStringNamespace | +to_datetime | ++ | + |
SeriesStringNamespace | +to_lowercase | ++ | + |
SeriesStringNamespace | +to_uppercase | ++ | + |
narwhals.DataFrame
Narwhals DataFrame, backed by a native dataframe.
+The native dataframe might be pandas.DataFrame, polars.DataFrame, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
.
columns: list[str]
+
+
+ property
+
+
+Get column names.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.columns
+
We can pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> agnostic_columns(df_pd)
+['foo', 'bar', 'ham']
+>>> agnostic_columns(df_pl)
+['foo', 'bar', 'ham']
+>>> agnostic_columns(df_pa)
+['foo', 'bar', 'ham']
+
implementation: Implementation
+
+
+ property
+
+
+Return implementation of native frame.
+This can be useful when you need to some special-casing for +some libraries for features outside of Narwhals' scope - for +example, when dealing with pandas' Period Dtype.
+ + +Returns:
+Type | +Description | +
---|---|
+ Implementation
+ |
+
+
+
+ Implementation. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> df_native = pd.DataFrame({"a": [1, 2, 3]})
+>>> df = nw.from_native(df_native)
+>>> df.implementation
+<Implementation.PANDAS: 1>
+>>> df.implementation.is_pandas()
+True
+>>> df.implementation.is_pandas_like()
+True
+>>> df.implementation.is_polars()
+False
+
schema: Schema
+
+
+ property
+
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.schema import Schema
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def agnostic_schema(df_native: IntoFrame) -> Schema:
+... df = nw.from_native(df_native)
+... return df.schema
+
You can pass either pandas or Polars to agnostic_schema
:
>>> df_pd_schema = agnostic_schema(df_pd)
+>>> df_pd_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
>>> df_pl_schema = agnostic_schema(df_pl)
+>>> df_pl_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
shape: tuple[int, int]
+
+
+ property
+
+
+Get the shape of the DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ tuple[int, int]
+ |
+
+
+
+ The shape of the dataframe as a tuple. + |
+
Examples:
+Construct pandas and polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>>
+>>> df = {"foo": [1, 2, 3, 4, 5]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
+... df = nw.from_native(df_native)
+... return df.shape
+
We can then pass either pandas, Polars or PyArrow to agnostic_shape
:
>>> agnostic_shape(df_pd)
+(5, 1)
+>>> agnostic_shape(df_pl)
+(5, 1)
+>>> agnostic_shape(df_pa)
+(5, 1)
+
__arrow_c_stream__(requested_schema=None)
+
+Export a DataFrame via the Arrow PyCapsule Interface.
+to_arrow
and then defer to PyArrow's implementationSee PyCapsule Interface +for more.
+ +__getitem__(item)
+
+Extract column or slice of DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ item
+ |
+
+ str | slice | Sequence[int] | Sequence[str] | tuple[Sequence[int], str | int] | tuple[slice, str | int] | tuple[slice | Sequence[int], Sequence[int] | Sequence[str] | slice] | tuple[slice, slice]
+ |
+
+
+
+ How to slice dataframe. What happens depends on what is passed. It's easiest
+to explain by example. Suppose we have a Dataframe
|
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Series[Any] | Self
+ |
+
+
+
+ A Narwhals Series, backed by a native series. + |
+
In contrast with Polars, pandas allows non-string column names.
+If you don't know whether the column name you're trying to extract
+is definitely a string (e.g. df[df.columns[0]]
) then you should
+use DataFrame.get_column
instead.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+>>>
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_slice(df_native: IntoDataFrame) -> IntoSeries:
+... df = nw.from_native(df_native)
+... return df["a"].to_native()
+
We can then pass either pandas, Polars or PyArrow to agnostic_slice
:
>>> agnostic_slice(df_pd)
+0 1
+1 2
+Name: a, dtype: int64
+>>> agnostic_slice(df_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+>>> agnostic_slice(df_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2
+ ]
+]
+
clone()
+
+Create a copy of this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which we clone the DataFrame:
+>>> @nw.narwhalify
+... def func(df):
+... return df.clone()
+
>>> func(df_pd)
+ a b
+0 1 3
+1 2 4
+
>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
collect_schema()
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.schema import Schema
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def agnostic_collect_schema(df_native: IntoFrame) -> Schema:
+... df = nw.from_native(df_native)
+... return df.collect_schema()
+
You can pass either pandas or Polars to agnostic_collect_schema
:
>>> df_pd_schema = agnostic_collect_schema(df_pd)
+>>> df_pd_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
>>> df_pl_schema = agnostic_collect_schema(df_pl)
+>>> df_pl_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
drop(*columns, strict=True)
+
+Remove columns from the dataframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *columns
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Validate that all column names exist in the schema and throw an +exception if a column name does not exist in the schema. + |
+
+ True
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("ham")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar
+0 1 6.0
+1 2 7.0
+2 3 8.0
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Use positional arguments to drop multiple columns.
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("foo", "ham")
+
>>> func(df_pd)
+ bar
+0 6.0
+1 7.0
+2 8.0
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
drop_nulls(subset=None)
+
+Drop null values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) for which null values are considered. If set to None +(default), use all columns. + |
+
+ None
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.drop_nulls().to_native()
+
We can then pass either pandas or Polars:
+>>> agnostic_drop_nulls(df_pd)
+ a ba
+0 1.0 1.0
+>>> agnostic_drop_nulls(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ ba │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 1.0 ┆ 1.0 │
+└─────┴─────┘
+
estimated_size(unit='b')
+
+Return an estimation of the total (heap) allocated size of the DataFrame
.
Estimated size is given in the specified unit (bytes by default).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ unit
+ |
+
+ SizeUnit
+ |
+
+
+
+ 'b', 'kb', 'mb', 'gb', 'tb', 'bytes', 'kilobytes', 'megabytes', + 'gigabytes', or 'terabytes'. + |
+
+ 'b'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ int | float
+ |
+
+
+
+ Integer or Float. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrameT
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_estimated_size(df_native: IntoDataFrameT) -> int | float:
+... df = nw.from_native(df_native)
+... return df.estimated_size()
+
We can then pass either pandas, Polars or PyArrow to agnostic_estimated_size
:
>>> agnostic_estimated_size(df_pd)
+np.int64(330)
+>>> agnostic_estimated_size(df_pl)
+51
+>>> agnostic_estimated_size(df_pa)
+63
+
filter(*predicates, **constraints)
+
+Filter the rows in the DataFrame based on one or more predicate expressions.
+The original order of the remaining rows is preserved.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr] | list[bool]
+ |
+
+
+
+ Expression(s) that evaluates to a boolean Series. Can +also be a (single!) boolean list. + |
+
+ ()
+ |
+
+ **constraints
+ |
+
+ Any
+ |
+
+
+
+ Column filters; use |
+
+ {}
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we filter on +one condition.
+>>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame:
+... df = nw.from_native(df_native)
+... return df.filter(nw.col("foo") > 1).to_native()
+
We can then pass either pandas or Polars to agnostic_filter
:
>>> agnostic_filter(df_pd)
+ foo bar ham
+1 2 7 b
+2 3 8 c
+>>> agnostic_filter(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions, combined with and/or operators:
+>>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame:
+... df = nw.from_native(df_native)
+... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")).to_native()
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
>>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame:
+... df = nw.from_native(df_native)
+... dframe = df.filter(
+... (nw.col("foo") == 1) | (nw.col("ham") == "c")
+... ).to_native()
+... return dframe
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+2 3 8 c
+>>> agnostic_filter(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame:
+... df = nw.from_native(df_native)
+... dframe = df.filter(
+... nw.col("foo") <= 2,
+... ~nw.col("ham").is_in(["b", "c"]),
+... ).to_native()
+... return dframe
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Provide multiple filters using **kwargs
syntax:
>>> def agnostic_filter(df_native: IntoFrame) -> IntoFrame:
+... df = nw.from_native(df_native)
+... return df.filter(foo=2, ham="b").to_native()
+>>> agnostic_filter(df_pd)
+ foo bar ham
+1 2 7 b
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+└─────┴─────┴─────┘
+
gather_every(n, offset=0)
+
+Take every nth row in the DataFrame and return as a new DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> @nw.narwhalify
+... def func(df):
+... return df.gather_every(n=2, offset=1)
+
>>> func(df_pd)
+ a b
+1 2 6
+3 4 8
+
>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 6 │
+│ 4 ┆ 8 │
+└─────┴─────┘
+
get_column(name)
+
+Get a single column by name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The column name as a string. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Series[Any]
+ |
+
+
+
+ A Narwhals Series, backed by a native series. + |
+
Although name
is typed as str
, pandas does allow non-string column
+names, and they will work when passed to this function if the
+narwhals.DataFrame
is backed by a pandas dataframe with non-string
+columns. This function can only be used to extract a column by name, so
+there is no risk of ambiguity.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+>>>
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def agnostic_get_column(df_native: IntoDataFrame) -> IntoSeries:
+... df = nw.from_native(df_native)
+... name = df.columns[0]
+... return df.get_column(name).to_native()
+
We can then pass either pandas or Polars to agnostic_get_column
:
>>> agnostic_get_column(df_pd)
+0 1
+1 2
+Name: a, dtype: int64
+>>> agnostic_get_column(df_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+
group_by(*keys, drop_null_keys=False)
+
+Start a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts multiple columns names as a list. + |
+
+ ()
+ |
+
+ drop_null_keys
+ |
+
+ bool
+ |
+
+
+
+ if True, then groups where any key is null won't be included +in the result. + |
+
+ False
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
GroupBy |
+ GroupBy[Self]
+ |
+
+
+
+ Object which can be used to perform aggregations. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we group by one column
+and call agg
to compute the grouped sum of another column.
>>> @nw.narwhalify
+... def func(df):
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b")
+>>> func(df_pd)
+ a b c
+0 a 1 5
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
head(n=5)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the last |
+
+ 5
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function that gets the first 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.head(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
is_duplicated()
+
+Get a mask of all duplicated rows in this DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ Series[Any]
+ |
+
+
+
+ A new Series. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.is_duplicated()
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 True
+1 False
+2 False
+3 True
+dtype: bool
+
>>> func(df_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ true
+ false
+ false
+ true
+]
+
is_empty()
+
+Check if the dataframe is empty.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that filters rows in which "foo" +values are greater than 10, and then checks if the result is empty or not:
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter(nw.col("foo") > 10).is_empty()
+
We can then pass either pandas or Polars to func
:
>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
+>>> func(df_pd), func(df_pl)
+(True, True)
+
>>> df_pd = pd.DataFrame({"foo": [100, 2, 3], "bar": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"foo": [100, 2, 3], "bar": [4, 5, 6]})
+>>> func(df_pd), func(df_pl)
+(False, False)
+
is_unique()
+
+Get a mask of all unique rows in this DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ Series[Any]
+ |
+
+
+
+ A new Series. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.is_unique()
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 False
+1 True
+2 True
+3 False
+dtype: bool
+
>>> func(df_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+ false
+]
+
item(row=None, column=None)
+
+Return the DataFrame as a scalar, or return the element at the given row/column.
+ + +If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1). +With row/col, this is equivalent to df[row,col].
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that returns item at given row/column
+>>> @nw.narwhalify
+... def func(df, row, column):
+... return df.item(row, column)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd, 1, 1), func(df_pd, 2, "b")
+(np.int64(5), np.int64(6))
+
>>> func(df_pl, 1, 1), func(df_pl, 2, "b")
+(5, 6)
+
iter_rows(*, named=False, buffer_size=512)
+
+Returns an iterator over the DataFrame of rows of python-native values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ named
+ |
+
+ bool
+ |
+
+
+
+ By default, each row is returned as a tuple of values given +in the same order as the frame columns. Setting named=True will +return rows of dictionaries instead. + |
+
+ False
+ |
+
+ buffer_size
+ |
+
+ int
+ |
+
+
+
+ Determines the number of rows that are buffered +internally while iterating over the data. +See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html + |
+
+ 512
+ |
+
cuDF doesn't support this method.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df, *, named):
+... return df.iter_rows(named=named)
+
We can then pass either pandas or Polars to func
:
>>> [row for row in func(df_pd, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in func(df_pd, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> [row for row in func(df_pl, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in func(df_pl, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+
join(other, on=None, how='inner', *, left_on=None, right_on=None, suffix='_right')
+
+Join in SQL-like fashion.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Name(s) of the join columns in both DataFrames. If set, |
+
+ None
+ |
+
+ how
+ |
+
+ Literal['inner', 'left', 'cross', 'semi', 'anti']
+ |
+
+
+
+ Join strategy. +
|
+
+ 'inner'
+ |
+
+ left_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the left DataFrame. + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the right DataFrame. + |
+
+ None
+ |
+
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to append to columns with a duplicate name. + |
+
+ '_right'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> data_other = {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+
>>> df_pd = pd.DataFrame(data)
+>>> other_pd = pd.DataFrame(data_other)
+
>>> df_pl = pl.DataFrame(data)
+>>> other_pl = pl.DataFrame(data_other)
+
Let's define a dataframe-agnostic function in which we join over "ham" column:
+>>> @nw.narwhalify
+... def join_on_ham(df, other_any):
+... return df.join(other_any, left_on="ham", right_on="ham")
+
We can now pass either pandas or Polars to the function:
+>>> join_on_ham(df_pd, other_pd)
+ foo bar ham apple
+0 1 6.0 a x
+1 2 7.0 b y
+
>>> join_on_ham(df_pl, other_pl)
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
join_asof(other, *, left_on=None, right_on=None, on=None, by_left=None, by_right=None, by=None, strategy='backward')
+
+Perform an asof join.
+This is similar to a left-join except that we match on nearest key rather than equal keys.
+Both DataFrames must be sorted by the asof_join key.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
+ left_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+
+ None
+ |
+
+ on
+ |
+
+ str | None
+ |
+
+
+
+ Join column of both DataFrames. If set, left_on and right_on should be None. + |
+
+ None
+ |
+
+ by_left
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by_right
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['backward', 'forward', 'nearest']
+ |
+
+
+
+ Join strategy. The default is "backward". +
|
+
+ 'backward'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data_gdp = {
+... "datetime": [
+... datetime(2016, 1, 1),
+... datetime(2017, 1, 1),
+... datetime(2018, 1, 1),
+... datetime(2019, 1, 1),
+... datetime(2020, 1, 1),
+... ],
+... "gdp": [4164, 4411, 4566, 4696, 4827],
+... }
+>>> data_population = {
+... "datetime": [
+... datetime(2016, 3, 1),
+... datetime(2018, 8, 1),
+... datetime(2019, 1, 1),
+... ],
+... "population": [82.19, 82.66, 83.12],
+... }
+>>> gdp_pd = pd.DataFrame(data_gdp)
+>>> population_pd = pd.DataFrame(data_population)
+
>>> gdp_pl = pl.DataFrame(data_gdp).sort("datetime")
+>>> population_pl = pl.DataFrame(data_population).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" column:
+>>> @nw.narwhalify
+... def join_asof_datetime(df, other_any, strategy):
+... return df.join_asof(other_any, on="datetime", strategy=strategy)
+
We can now pass either pandas or Polars to the function:
+>>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
+ datetime population gdp
+0 2016-03-01 82.19 4164
+1 2018-08-01 82.66 4566
+2 2019-01-01 83.12 4696
+
>>> join_asof_datetime(population_pl, gdp_pl, strategy="backward")
+shape: (3, 3)
+┌─────────────────────┬────────────┬──────┐
+│ datetime ┆ population ┆ gdp │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ f64 ┆ i64 │
+╞═════════════════════╪════════════╪══════╡
+│ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │
+│ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │
+│ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │
+└─────────────────────┴────────────┴──────┘
+
Here is a real-world times-series example that uses by
argument.
>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data_quotes = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 30),
+... datetime(2016, 5, 25, 13, 30, 0, 41),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 49),
+... datetime(2016, 5, 25, 13, 30, 0, 72),
+... datetime(2016, 5, 25, 13, 30, 0, 75),
+... ],
+... "ticker": [
+... "GOOG",
+... "MSFT",
+... "MSFT",
+... "MSFT",
+... "GOOG",
+... "AAPL",
+... "GOOG",
+... "MSFT",
+... ],
+... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
+... }
+>>> data_trades = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 38),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... ],
+... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+... "quantity": [75, 155, 100, 100, 100],
+... }
+>>> quotes_pd = pd.DataFrame(data_quotes)
+>>> trades_pd = pd.DataFrame(data_trades)
+>>> quotes_pl = pl.DataFrame(data_quotes).sort("datetime")
+>>> trades_pl = pl.DataFrame(data_trades).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns:
+>>> @nw.narwhalify
+... def join_asof_datetime_by_ticker(df, other_any):
+... return df.join_asof(other_any, on="datetime", by="ticker")
+
We can now pass either pandas or Polars to the function:
+>>> join_asof_datetime_by_ticker(trades_pd, quotes_pd)
+ datetime ticker price quantity bid ask
+0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96
+1 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98
+2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93
+3 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93
+4 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN
+
>>> join_asof_datetime_by_ticker(trades_pl, quotes_pl)
+shape: (5, 6)
+┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐
+│ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │
+╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡
+│ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │
+│ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │
+└────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘
+
lazy()
+
+Lazify the DataFrame (if possible).
+If a library does not support lazy execution, then this is a no-op.
+ + +Returns:
+Type | +Description | +
---|---|
+ LazyFrame[Any]
+ |
+
+
+
+ A new LazyFrame. + |
+
Examples:
+Construct pandas, Polars and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def agnostic_lazy(df_native: IntoFrame) -> IntoFrame:
+... df = nw.from_native(df_native)
+... return df.lazy().to_native()
+
Note that then, pandas and pyarrow dataframe stay eager, but Polars DataFrame becomes a Polars LazyFrame:
+>>> agnostic_lazy(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> agnostic_lazy(df_pl)
+<LazyFrame ...>
+>>> agnostic_lazy(df_pa)
+pyarrow.Table
+foo: int64
+bar: double
+ham: string
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+ham: [["a","b","c"]]
+
null_count()
+
+Create a new DataFrame that shows the null counts per column.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "foo": [1, None, 3],
+... "bar": [6, 7, None],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, None, 3],
+... "bar": [6, 7, None],
+... "ham": ["a", "b", "c"],
+... }
+... )
+
Let's define a dataframe-agnostic function that returns the null count of +each columns:
+>>> @nw.narwhalify
+... def func(df):
+... return df.null_count()
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 1 0
+
>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ u32 ┆ u32 │
+╞═════╪═════╪═════╡
+│ 1 ┆ 1 ┆ 0 │
+└─────┴─────┴─────┘
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.pipe(
+... lambda _df: _df.select(
+... [x for x in _df.columns if len(x) == 1]
+... ).to_native()
+... )
+
We can then pass either pandas or Polars:
+>>> agnostic_pipe(df_pd)
+ a
+0 1
+1 2
+2 3
+>>> agnostic_pipe(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
pivot(on, *, index=None, values=None, aggregate_function=None, maintain_order=True, sort_columns=False, separator='_')
+
+Create a spreadsheet-style pivot table as a DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str]
+ |
+
+
+
+ Name of the column(s) whose values will be used as the header of the +output DataFrame. + |
+ + required + | +
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ One or multiple keys to group by. If None, all remaining columns not
+specified on |
+
+ None
+ |
+
+ values
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ One or multiple keys to group by. If None, all remaining columns not
+specified on |
+
+ None
+ |
+
+ aggregate_function
+ |
+
+ Literal['min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'] | None
+ |
+
+
+
+ Choose from: +- None: no aggregation takes place, will raise error if multiple values + are in group. +- A predefined aggregate function string, one of + {'min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'} + |
+
+ None
+ |
+
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Sort the grouped keys so that the output order is predictable. + |
+
+ True
+ |
+
+ sort_columns
+ |
+
+ bool
+ |
+
+
+
+ Sort the transposed columns by name. Default is by order of +discovery. + |
+
+ False
+ |
+
+ separator
+ |
+
+ str
+ |
+
+
+
+ Used as separator/delimiter in generated column names in case of
+multiple |
+
+ '_'
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "ix": [1, 1, 2, 2, 1, 2],
+... "col": ["a", "a", "a", "a", "b", "b"],
+... "foo": [0, 1, 2, 2, 7, 1],
+... "bar": [0, 2, 0, 0, 9, 4],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.pivot("col", index="ix", aggregate_function="sum")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ ix foo_a foo_b bar_a bar_b
+0 1 1 7 2 9
+1 2 4 1 0 4
+>>> func(df_pl)
+shape: (2, 5)
+┌─────┬───────┬───────┬───────┬───────┐
+│ ix ┆ foo_a ┆ foo_b ┆ bar_a ┆ bar_b │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
+╞═════╪═══════╪═══════╪═══════╪═══════╡
+│ 1 ┆ 1 ┆ 7 ┆ 2 ┆ 9 │
+│ 2 ┆ 4 ┆ 1 ┆ 0 ┆ 4 │
+└─────┴───────┴───────┴───────┴───────┘
+
rename(mapping)
+
+Rename column names.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mapping
+ |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.rename({"foo": "apple"})
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ apple bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
row(index)
+
+Get values at given row.
+Note
+You should NEVER use this method to iterate over a DataFrame; +if you require row-iteration you should strongly prefer use of iter_rows() instead.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ index
+ |
+
+ int
+ |
+
+
+
+ Row number. + |
+ + required + | +
cuDF doesn't support this method.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from narwhals.typing import IntoDataFrame
+>>> from typing import Any
+>>>
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a library-agnostic function to get the second row.
+>>> def agnostic_row(df_native: IntoDataFrame) -> tuple[Any, ...]:
+... df = nw.from_native(df_native)
+... return df.row(1)
+
We can then pass pandas / Polars / any other supported library:
+>>> agnostic_row(df_pd)
+(2, 5)
+>>> agnostic_row(df_pl)
+(2, 5)
+
rows(*, named=False)
+
+Returns all data in the DataFrame as a list of rows of python-native values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ named
+ |
+
+ bool
+ |
+
+
+
+ By default, each row is returned as a tuple of values given +in the same order as the frame columns. Setting named=True will +return rows of dictionaries instead. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df, *, named):
+... return df.rows(named=named)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> func(df_pd, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> func(df_pl, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> func(df_pl, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample from this DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
The results may not be consistent across libraries.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.sample(n=2, seed=123)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+3 4 y
+0 1 x
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ str │
+╞═════╪═════╡
+│ 2 ┆ y │
+│ 3 ┆ x │
+└─────┴─────┘
+
As you can see, by using the same seed, the result will be consistent within +the same backend, but not necessarely across different backends.
+ +select(*exprs, **named_exprs)
+
+Select columns from this DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we pass the name of a +column to select that column.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select("foo")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo
+0 1
+1 2
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(["foo", "bar"])
+>>> func(df_pd)
+ foo bar
+0 1 6
+1 2 7
+2 3 8
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("foo"), nw.col("bar") + 1)
+>>> func(df_pd)
+ foo bar
+0 1 7
+1 2 8
+2 3 9
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(threshold=nw.col("foo") * 2)
+>>> func(df_pd)
+ threshold
+0 2
+1 4
+2 6
+>>> func(df_pl)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False, nulls_last=False)
+
+Sort the dataframe by the given columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ by
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
+ *more_by
+ |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional arguments. + |
+
+ ()
+ |
+
+ descending
+ |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple columns, can be +specified per column by passing a sequence of booleans. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last. + |
+
+ False
+ |
+
Unlike Polars, it is not possible to specify a sequence of booleans for
+nulls_last
in order to control per-column behaviour. Instead a single
+boolean is applied for all by
columns.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which we sort by multiple +columns in different orders
+>>> @nw.narwhalify
+... def func(df):
+... return df.sort("c", "a", descending=[False, True])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c
+0 1.0 6.0 a
+2 NaN 4.0 b
+1 2.0 5.0 c
+>>> func(df_pl)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
tail(n=5)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the first |
+
+ 5
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function that gets the last 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.tail(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+2 3 8 c
+3 4 9 d
+4 5 10 e
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 3 ┆ 8 ┆ c │
+│ 4 ┆ 9 ┆ d │
+│ 5 ┆ 10 ┆ e │
+└─────┴─────┴─────┘
+
to_arrow()
+
+Convert to arrow table.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2, 3], "bar": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that converts to arrow table:
+>>> @nw.narwhalify
+... def func(df):
+... return df.to_arrow()
+
>>> func(df_pd)
+pyarrow.Table
+foo: int64
+bar: string
+----
+foo: [[1,2,3]]
+bar: [["a","b","c"]]
+
>>> func(df_pl)
+pyarrow.Table
+foo: int64
+bar: large_string
+----
+foo: [[1,2,3]]
+bar: [["a","b","c"]]
+
to_dict(*, as_series=True)
+
+Convert DataFrame to a dictionary mapping column name to values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ as_series
+ |
+
+ bool
+ |
+
+
+
+ If set to true |
+
+ True
+ |
+
Returns:
+Type | +Description | +
---|---|
+ dict[str, Series[Any]] | dict[str, list[Any]]
+ |
+
+
+
+ A mapping from column name to values / Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>>
+>>> df = {
+... "A": [1, 2, 3, 4, 5],
+... "fruits": ["banana", "banana", "apple", "apple", "banana"],
+... "B": [5, 4, 3, 2, 1],
+... "animals": ["beetle", "fly", "beetle", "beetle", "beetle"],
+... "optional": [28, 300, None, 2, -30],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def agnostic_to_dict(
+... df_native: IntoDataFrame,
+... ) -> dict[str, list[int | str | float | None]]:
+... df = nw.from_native(df_native)
+... return df.to_dict(as_series=False)
+
We can then pass either pandas, Polars or PyArrow to agnostic_to_dict
:
>>> agnostic_to_dict(df_pd)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28.0, 300.0, nan, 2.0, -30.0]}
+>>> agnostic_to_dict(df_pl)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+>>> agnostic_to_dict(df_pa)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+
to_native()
+
+Convert Narwhals DataFrame to native one.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrameT
+ |
+
+
+
+ Object of class that user started with. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Calling to_native
on a Narwhals DataFrame returns the native object:
>>> nw.from_native(df_pd).to_native()
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> nw.from_native(df_pl).to_native()
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 7.0 ┆ b │
+│ 3 ┆ 8.0 ┆ c │
+└─────┴─────┴─────┘
+>>> nw.from_native(df_pa).to_native()
+pyarrow.Table
+foo: int64
+bar: double
+ham: string
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+ham: [["a","b","c"]]
+
to_numpy()
+
+Convert this DataFrame to a NumPy ndarray.
+ + +Returns:
+Type | +Description | +
---|---|
+ ndarray
+ |
+
+
+
+ A NumPy ndarray array. + |
+
Examples:
+Construct pandas and polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> import numpy as np
+>>> from narwhals.typing import IntoDataFrame
+>>>
+>>> df = {"foo": [1, 2, 3], "bar": [6.5, 7.0, 8.5], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def agnostic_to_numpy(df_native: IntoDataFrame) -> np.ndarray:
+... df = nw.from_native(df_native)
+... return df.to_numpy()
+
We can then pass either pandas, Polars or PyArrow to agnostic_to_numpy
:
>>> agnostic_to_numpy(df_pd)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+>>> agnostic_to_numpy(df_pl)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+>>> agnostic_to_numpy(df_pa)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+
to_pandas()
+
+Convert this DataFrame to a pandas DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ A pandas DataFrame. + |
+
Examples:
+Construct pandas, Polars (eager) and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>>
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def agnostic_to_pandas(df_native: IntoDataFrame) -> pd.DataFrame:
+... df = nw.from_native(df_native)
+... return df.to_pandas()
+
We can then pass any supported library such as pandas, Polars (eager), or PyArrow to agnostic_to_pandas
:
>>> agnostic_to_pandas(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> agnostic_to_pandas(df_pl)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> agnostic_to_pandas(df_pa)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+
unique(subset=None, *, keep='any', maintain_order=False)
+
+Drop duplicate rows from this dataframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows. + |
+
+ None
+ |
+
+ keep
+ |
+
+ Literal['any', 'first', 'last', 'none']
+ |
+
+
+
+ {'first', 'last', 'any', 'none'} +Which of the duplicate rows to keep. +
|
+
+ 'any'
+ |
+
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original DataFrame. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.unique(["bar", "ham"])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 a b
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
unpivot(on=None, *, index=None, variable_name=None, value_name=None)
+
+Unpivot a DataFrame from wide to long format.
+Optionally leaves identifiers set.
+This function is useful to massage a DataFrame into a format where one or more +columns are identifier variables (index) while all other columns, considered +measured variables (on), are "unpivoted" to the row axis leaving just +two non-identifier columns, 'variable' and 'value'.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as values variables; if |
+
+ None
+ |
+
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as identifier variables. + |
+
+ None
+ |
+
+ variable_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
+ value_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
If you're coming from pandas, this is similar to pandas.DataFrame.melt
,
+but with index
replacing id_vars
and on
replacing value_vars
.
+In other frameworks, you might know this operation as pivot_longer
.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": ["x", "y", "z"],
+... "b": [1, 3, 5],
+... "c": [2, 4, 6],
+... }
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.unpivot(on=["b", "c"], index="a")
+
We can pass any supported library such as pandas, Polars or PyArrow to func
:
>>> func(pl.DataFrame(data))
+shape: (6, 3)
+┌─────┬──────────┬───────┐
+│ a ┆ variable ┆ value │
+│ --- ┆ --- ┆ --- │
+│ str ┆ str ┆ i64 │
+╞═════╪══════════╪═══════╡
+│ x ┆ b ┆ 1 │
+│ y ┆ b ┆ 3 │
+│ z ┆ b ┆ 5 │
+│ x ┆ c ┆ 2 │
+│ y ┆ c ┆ 4 │
+│ z ┆ c ┆ 6 │
+└─────┴──────────┴───────┘
+
>>> func(pd.DataFrame(data))
+ a variable value
+0 x b 1
+1 y b 3
+2 z b 5
+3 x c 2
+4 y c 4
+5 z c 6
+
>>> func(pa.table(data))
+pyarrow.Table
+a: string
+variable: string
+value: int64
+----
+a: [["x","y","z"],["x","y","z"]]
+variable: [["b","b","b"],["c","c","c"]]
+value: [[1,3,5],[2,4,6]]
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this DataFrame.
+Added columns will replace existing columns with the same name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
DataFrame |
+ Self
+ |
+
+
+
+ A new DataFrame with the columns added. + |
+
Creating a new DataFrame using this method does not create a new copy of +existing data.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we pass an expression +to add it as a new column:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns((nw.col("a") * 2).alias("a*2"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c a*2
+0 1 0.5 True 2
+1 2 4.0 True 4
+2 3 10.0 False 6
+3 4 13.0 True 8
+>>> func(df_pl)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ a*2 │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
with_row_index(name='index')
+
+Insert column which enumerates rows.
+ + +Examples:
+Construct pandas as polars DataFrames:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_with_row_index(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_row_index().to_native()
+
We can then pass either pandas or Polars:
+>>> agnostic_with_row_index(df_pd)
+ index a b
+0 0 1 4
+1 1 2 5
+2 2 3 6
+>>> agnostic_with_row_index(df_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ index ┆ a ┆ b │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ i64 ┆ i64 │
+╞═══════╪═════╪═════╡
+│ 0 ┆ 1 ┆ 4 │
+│ 1 ┆ 2 ┆ 5 │
+│ 2 ┆ 3 ┆ 6 │
+└───────┴─────┴─────┘
+
write_csv(file=None)
+
+Write dataframe to comma-separated values (CSV) file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ file
+ |
+
+ str | Path | BytesIO | None
+ |
+
+
+
+ String, path object or file-like object to which the dataframe will be +written. If None, the resulting csv format is returned as a string. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ String or None. + |
+
Examples:
+Construct pandas, Polars (eager) and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def func(df):
+... df = nw.from_native(df)
+... return df.write_csv()
+
We can pass any supported library such as pandas, Polars or PyArrow to func
:
>>> func(df_pd)
+'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
+>>> func(df_pl)
+'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
+>>> func(df_pa)
+'"foo","bar","ham"\n1,6,"a"\n2,7,"b"\n3,8,"c"\n'
+
If we had passed a file name to write_csv
, it would have been
+written to that file.
write_parquet(file)
+
+Write dataframe to parquet file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ file
+ |
+
+ str | Path | BytesIO
+ |
+
+
+
+ String, path object or file-like object to which the dataframe will be +written. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ None
+ |
+
+
+
+ None. + |
+
Examples:
+Construct pandas, Polars and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def func(df):
+... df = nw.from_native(df)
+... df.write_parquet("foo.parquet")
+
We can then pass either pandas, Polars or PyArrow to func
:
>>> func(df_pd)
+>>> func(df_pl)
+>>> func(df_pa)
+
narwhals.dependencies
get_cudf()
+
+Get cudf module (if already imported - else return None).
+ +get_ibis()
+
+Get ibis module (if already imported - else return None).
+ +get_modin()
+
+Get modin.pandas module (if already imported - else return None).
+ +get_pandas()
+
+Get pandas module (if already imported - else return None).
+ +get_polars()
+
+Get Polars module (if already imported - else return None).
+ +get_pyarrow()
+
+Get pyarrow module (if already imported - else return None).
+ +is_cudf_dataframe(df)
+
+Check whether df
is a cudf DataFrame without importing cudf.
is_cudf_index(index)
+
+Check whether index
is a cudf Index without importing cudf.
is_cudf_series(ser)
+
+Check whether ser
is a cudf Series without importing cudf.
is_dask_dataframe(df)
+
+Check whether df
is a Dask DataFrame without importing Dask.
is_ibis_table(df)
+
+Check whether df
is a Ibis Table without importing Ibis.
is_into_dataframe(native_dataframe)
+
+Check whether native_dataframe
can be converted to a Narwhals DataFrame.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_dataframe
+ |
+
+ Any
+ |
+
+
+
+ The object to check. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+
|
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> from narwhals.dependencies import is_into_dataframe
+
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
+
>>> is_into_dataframe(df_pd)
+True
+>>> is_into_dataframe(df_pl)
+True
+>>> is_into_dataframe(np_arr)
+False
+
is_into_series(native_series)
+
+Check whether native_series
can be converted to a Narwhals Series.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_series
+ |
+
+ IntoSeries
+ |
+
+
+
+ The object to check. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+
|
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> import narwhals as nw
+
>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+>>> np_arr = np.array([1, 2, 3])
+
>>> nw.dependencies.is_into_series(s_pd)
+True
+>>> nw.dependencies.is_into_series(s_pl)
+True
+>>> nw.dependencies.is_into_series(np_arr)
+False
+
is_modin_dataframe(df)
+
+Check whether df
is a modin DataFrame without importing modin.
is_modin_index(index)
+
+Check whether index
is a modin Index without importing modin.
is_modin_series(ser)
+
+Check whether ser
is a modin Series without importing modin.
is_numpy_array(arr)
+
+Check whether arr
is a NumPy Array without importing NumPy.
is_pandas_dataframe(df)
+
+Check whether df
is a pandas DataFrame without importing pandas.
is_pandas_index(index)
+
+Check whether index
is a pandas Index without importing pandas.
is_pandas_like_dataframe(df)
+
+Check whether df
is a pandas-like DataFrame without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_like_index(index)
+
+Check whether index
is a pandas-like Index without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_like_series(ser)
+
+Check whether ser
is a pandas-like Series without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_series(ser)
+
+Check whether ser
is a pandas Series without importing pandas.
is_polars_dataframe(df)
+
+Check whether df
is a Polars DataFrame without importing Polars.
is_polars_lazyframe(df)
+
+Check whether df
is a Polars LazyFrame without importing Polars.
is_polars_series(ser)
+
+Check whether ser
is a Polars Series without importing Polars.
is_pyarrow_chunked_array(ser)
+
+Check whether ser
is a PyArrow ChunkedArray without importing PyArrow.
is_pyarrow_table(df)
+
+Check whether df
is a PyArrow Table without importing PyArrow.
narwhals.dtypes
Array
+
+
+Fixed length list type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ inner
+ |
+
+ DType | type[DType]
+ |
+
+
+
+ The datatype of the values within each array. + |
+ + required + | +
+ width
+ |
+
+ int | None
+ |
+
+
+
+ the length of each array. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [[1, 2], [3, 4], [5, 6]]
+>>> ser_pd = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int32(), 2)))
+>>> ser_pl = pl.Series(data, dtype=pl.Array(pl.Int32, 2))
+>>> ser_pa = pa.chunked_array([data], type=pa.list_(pa.int32(), 2))
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Array(Int32, 2)
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Array(Int32, 2)
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Array(Int32, 2)
+
Decimal
+
+
+Decimal type.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = pl.Series(["1.5"], dtype=pl.Decimal)
+>>> nw.from_native(s, series_only=True).dtype
+Decimal
+
List
+
+
+Variable length list type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [["narwhal", "orca"], ["beluga", "vaquita"]]
+>>> ser_pd = pd.Series(data, dtype=pd.ArrowDtype(pa.large_list(pa.large_string())))
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+List(String)
+>>> nw.from_native(ser_pl, series_only=True).dtype
+List(String)
+>>> nw.from_native(ser_pa, series_only=True).dtype
+List(String)
+
Int128
+
+
+128-bit signed integer type.
+ + + + + + + + + +Int64
+
+
+64-bit signed integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Int64
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Int64
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Int64
+
Int32
+
+
+32-bit signed integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.Int32).dtype
+
>>> func(ser_pd)
+Int32
+>>> func(ser_pl)
+Int32
+>>> func(ser_pa)
+Int32
+
Int16
+
+
+16-bit signed integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.Int16).dtype
+
>>> func(ser_pd)
+Int16
+>>> func(ser_pl)
+Int16
+>>> func(ser_pa)
+Int16
+
Int8
+
+
+8-bit signed integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.Int8).dtype
+
>>> func(ser_pd)
+Int8
+>>> func(ser_pl)
+Int8
+>>> func(ser_pa)
+Int8
+
UInt128
+
+
+128-bit unsigned integer type.
+ + + + + + + + + +UInt64
+
+
+64-bit unsigned integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.UInt64).dtype
+
>>> func(ser_pd)
+UInt64
+>>> func(ser_pl)
+UInt64
+>>> func(ser_pa)
+UInt64
+
UInt32
+
+
+32-bit unsigned integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.UInt32).dtype
+
>>> func(ser_pd)
+UInt32
+>>> func(ser_pl)
+UInt32
+>>> func(ser_pa)
+UInt32
+
UInt16
+
+
+16-bit unsigned integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.UInt16).dtype
+
>>> func(ser_pd)
+UInt16
+>>> func(ser_pl)
+UInt16
+>>> func(ser_pa)
+UInt16
+
UInt8
+
+
+8-bit unsigned integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.UInt8).dtype
+
>>> func(ser_pd)
+UInt8
+>>> func(ser_pl)
+UInt8
+>>> func(ser_pa)
+UInt8
+
Field
+
+
+Definition of a single field within a Struct
DataType.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The name of the field within its parent |
+ + required + | +
+ dtype
+ |
+
+ type[DType] | DType
+ |
+
+
+
+ The |
+ + required + | +
Float64
+
+
+64-bit floating point type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [0.001, 0.1, 0.01, 0.1]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Float64
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Float64
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Float64
+
Float32
+
+
+32-bit floating point type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [0.001, 0.1, 0.01, 0.1]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.Float32).dtype
+
>>> func(ser_pd)
+Float32
+>>> func(ser_pl)
+Float32
+>>> func(ser_pa)
+Float32
+
Boolean
+
+
+Boolean type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [True, False, False, True]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Boolean
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Boolean
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Boolean
+
Categorical
+
+
+A categorical encoding of a set of strings.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = ["beluga", "narwhal", "orca", "vaquita"]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).cast(nw.Categorical).dtype
+Categorical
+>>> nw.from_native(ser_pl, series_only=True).cast(nw.Categorical).dtype
+Categorical
+>>> nw.from_native(ser_pa, series_only=True).cast(nw.Categorical).dtype
+Categorical
+
Enum
+
+
+A fixed categorical encoding of a unique set of strings.
+Polars has an Enum data type, while pandas and PyArrow do not.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["beluga", "narwhal", "orca", "vaquita"]
+>>> ser_pl = pl.Series(data, dtype=pl.Enum(data))
+
>>> nw.from_native(ser_pl, series_only=True).dtype
+Enum
+
String
+
+
+UTF-8 encoded string type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = ["beluga", "narwhal", "orca", "vaquita"]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+String
+>>> nw.from_native(ser_pl, series_only=True).dtype
+String
+>>> nw.from_native(ser_pa, series_only=True).dtype
+String
+
Struct
+
+
+Struct composite type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ fields
+ |
+
+ Sequence[Field] | Mapping[str, DType | type[DType]]
+ |
+
+
+
+ The fields that make up the struct. Can be either a sequence of Field +objects or a mapping of column names to data types. + |
+ + required + | +
Examples:
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [{"a": 1, "b": ["narwhal", "beluga"]}, {"a": 2, "b": ["orca"]}]
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pl, series_only=True).dtype
+Struct({'a': Int64, 'b': List(String)})
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Struct({'a': Int64, 'b': List(String)})
+
to_schema()
+
+Return Struct dtype as a schema dict.
+ + +Returns:
+Type | +Description | +
---|---|
+ OrderedDict[str, DType | type[DType]]
+ |
+
+
+
+ Mapping from column name to dtype. + |
+
Date
+
+
+Data type representing a calendar date.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from datetime import date, timedelta
+>>> data = [date(2024, 12, 1) + timedelta(days=d) for d in range(4)]
+>>> ser_pd = pd.Series(data, dtype="date32[pyarrow]")
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Date
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Date
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Date
+
Datetime
+
+
+Data type representing a calendar date and time of day.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['us', 'ns', 'ms', 's']
+ |
+
+
+
+ Unit of time. Defaults to |
+
+ 'us'
+ |
+
+ time_zone
+ |
+
+ str | timezone | None
+ |
+
+
+
+ Time zone string, as defined in zoneinfo (to see valid strings run
+ |
+
+ None
+ |
+
Adapted from Polars implementation
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import pyarrow.compute as pc
+>>> import narwhals as nw
+>>> from datetime import datetime, timedelta
+>>> data = [datetime(2024, 12, 9) + timedelta(days=n) for n in range(5)]
+>>> ser_pd = (
+... pd.Series(data)
+... .dt.tz_localize("Africa/Accra")
+... .astype("datetime64[ms, Africa/Accra]")
+... )
+>>> ser_pl = (
+... pl.Series(data).cast(pl.Datetime("ms")).dt.replace_time_zone("Africa/Accra")
+... )
+>>> ser_pa = pc.assume_timezone(
+... pa.chunked_array([data], type=pa.timestamp("ms")), "Africa/Accra"
+... )
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Datetime(time_unit='ms', time_zone='Africa/Accra')
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Datetime(time_unit='ms', time_zone='Africa/Accra')
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Datetime(time_unit='ms', time_zone='Africa/Accra')
+
Duration
+
+
+Data type representing a time duration.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['us', 'ns', 'ms', 's']
+ |
+
+
+
+ Unit of time. Defaults to |
+
+ 'us'
+ |
+
Adapted from Polars implementation
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from datetime import timedelta
+>>> data = [timedelta(seconds=d) for d in range(1, 4)]
+>>> ser_pd = pd.Series(data).astype("timedelta64[ms]")
+>>> ser_pl = pl.Series(data).cast(pl.Duration("ms"))
+>>> ser_pa = pa.chunked_array([data], type=pa.duration("ms"))
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Duration(time_unit='ms')
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Duration(time_unit='ms')
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Duration(time_unit='ms')
+
Object
+
+
+Data type for wrapping arbitrary Python objects.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> class Foo: ...
+>>> ser_pd = pd.Series([Foo(), Foo()])
+>>> ser_pl = pl.Series([Foo(), Foo()])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Object
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Object
+
Unknown
+
+
+Type representing DataType values that could not be determined statically.
+ + +Examples:
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = pd.period_range("2000-01", periods=4, freq="M")
+>>> ser_pd = pd.Series(data)
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Unknown
+
narwhals.exceptions
ColumnNotFoundError
+
+
+Exception raised when column name isn't present.
+ + + + + + + + + +InvalidIntoExprError
+
+
+Exception raised when object can't be converted to expression.
+ + + + + + + + + +InvalidOperationError
+
+
+Exception raised during invalid operations.
+ + + + + + + +NarwhalsUnstableWarning
+
+
+Warning issued when a method or function is considered unstable in the stable api.
+ + + + + + + +narwhals.Expr
abs()
+
+Return absolute value of each element.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, -2], "b": [-3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").abs()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 1 3
+1 2 4
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2]]
+b: [[3,4]]
+
alias(name)
+
+Rename the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [4, 5]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select((nw.col("b") + 10).alias("c")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ c
+0 14
+1 15
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌─────┐
+│ c │
+│ --- │
+│ i64 │
+╞═════╡
+│ 14 │
+│ 15 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+c: int64
+----
+c: [[14,15]]
+
all()
+
+Return whether all values in the column are True
.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").all()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 False True
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌───────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪══════╡
+│ false ┆ true │
+└───────┴──────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false]]
+b: [[true]]
+
any()
+
+Return whether any of the values in the column are True
.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").any()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 True True
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞══════╪══════╡
+│ true ┆ true │
+└──────┴──────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true]]
+b: [[true]]
+
arg_max()
+
+Returns the index of the maximum value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
+>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [150, 100]})
+>>> df_pa = pa.table({"a": [10, 20], "b": [150, 100]})
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_arg_max(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a", "b").arg_max().name.suffix("_arg_max")
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow:
+>>> agnostic_arg_max(df_pd)
+ a_arg_max b_arg_max
+0 1 0
+>>> agnostic_arg_max(df_pl)
+shape: (1, 2)
+┌───────────┬───────────┐
+│ a_arg_max ┆ b_arg_max │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═══════════╪═══════════╡
+│ 1 ┆ 0 │
+└───────────┴───────────┘
+>>> agnostic_arg_max(df_pa)
+pyarrow.Table
+a_arg_max: int64
+b_arg_max: int64
+----
+a_arg_max: [[1]]
+b_arg_max: [[0]]
+
arg_min()
+
+Returns the index of the minimum value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
+>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [150, 100]})
+>>> df_pa = pa.table({"a": [10, 20], "b": [150, 100]})
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_arg_min(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a", "b").arg_min().name.suffix("_arg_min")
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow:
+>>> agnostic_arg_min(df_pd)
+ a_arg_min b_arg_min
+0 0 1
+>>> agnostic_arg_min(df_pl)
+shape: (1, 2)
+┌───────────┬───────────┐
+│ a_arg_min ┆ b_arg_min │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═══════════╪═══════════╡
+│ 0 ┆ 1 │
+└───────────┴───────────┘
+>>> agnostic_arg_min(df_pa)
+pyarrow.Table
+a_arg_min: int64
+b_arg_min: int64
+----
+a_arg_min: [[0]]
+b_arg_min: [[1]]
+
arg_true()
+
+Find elements where boolean expression is True.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, None, None, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").is_null().arg_true()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+1 1
+2 2
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ u32 │
+╞═════╡
+│ 1 │
+│ 2 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1,2]]
+
cast(dtype)
+
+Redefine an object's data type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtype
+ |
+
+ DType | type[DType]
+ |
+
+
+
+ Data type that the object will be cast into. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> from datetime import date
+>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+>>> df_pa = pa.table({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ foo bar
+0 1.0 6
+1 2.0 7
+2 3.0 8
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ f32 ┆ u8 │
+╞═════╪═════╡
+│ 1.0 ┆ 6 │
+│ 2.0 ┆ 7 │
+│ 3.0 ┆ 8 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+foo: float
+bar: uint8
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+
count()
+
+Returns the number of non-null elements in the column.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+>>> df_pa = pa.table({"a": [1, 2, 3], "b": [None, 4, 4]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().count()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 3 2
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 3 ┆ 2 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[3]]
+b: [[2]]
+
cum_count(*, reverse=False)
+
+Return the cumulative count of the non-null values in the column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": ["x", "k", None, "d"]}
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a").cum_count().alias("cum_count"),
+... nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"),
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
+ a cum_count cum_count_reverse
+0 x 1 3
+1 k 2 2
+2 None 2 1
+3 d 3 1
+
>>> my_library_agnostic_function(pl.DataFrame(data))
+shape: (4, 3)
+┌──────┬───────────┬───────────────────┐
+│ a ┆ cum_count ┆ cum_count_reverse │
+│ --- ┆ --- ┆ --- │
+│ str ┆ u32 ┆ u32 │
+╞══════╪═══════════╪═══════════════════╡
+│ x ┆ 1 ┆ 3 │
+│ k ┆ 2 ┆ 2 │
+│ null ┆ 2 ┆ 1 │
+│ d ┆ 3 ┆ 1 │
+└──────┴───────────┴───────────────────┘
+
>>> my_library_agnostic_function(pa.table(data))
+pyarrow.Table
+a: string
+cum_count: uint32
+cum_count_reverse: uint32
+----
+a: [["x","k",null,"d"]]
+cum_count: [[1,2,2,3]]
+cum_count_reverse: [[3,2,1,1]]
+
cum_max(*, reverse=False)
+
+Return the cumulative max of the non-null values in the column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 3, None, 2]}
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a").cum_max().alias("cum_max"),
+... nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"),
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
+ a cum_max cum_max_reverse
+0 1.0 1.0 3.0
+1 3.0 3.0 3.0
+2 NaN NaN NaN
+3 2.0 3.0 2.0
+
>>> my_library_agnostic_function(pl.DataFrame(data))
+shape: (4, 3)
+┌──────┬─────────┬─────────────────┐
+│ a ┆ cum_max ┆ cum_max_reverse │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞══════╪═════════╪═════════════════╡
+│ 1 ┆ 1 ┆ 3 │
+│ 3 ┆ 3 ┆ 3 │
+│ null ┆ null ┆ null │
+│ 2 ┆ 3 ┆ 2 │
+└──────┴─────────┴─────────────────┘
+
>>> my_library_agnostic_function(pa.table(data))
+pyarrow.Table
+a: int64
+cum_max: int64
+cum_max_reverse: int64
+----
+a: [[1,3,null,2]]
+cum_max: [[1,3,null,3]]
+cum_max_reverse: [[3,3,null,2]]
+
cum_min(*, reverse=False)
+
+Return the cumulative min of the non-null values in the column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [3, 1, None, 2]}
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a").cum_min().alias("cum_min"),
+... nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"),
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
+ a cum_min cum_min_reverse
+0 3.0 3.0 1.0
+1 1.0 1.0 1.0
+2 NaN NaN NaN
+3 2.0 1.0 2.0
+
>>> my_library_agnostic_function(pl.DataFrame(data))
+shape: (4, 3)
+┌──────┬─────────┬─────────────────┐
+│ a ┆ cum_min ┆ cum_min_reverse │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞══════╪═════════╪═════════════════╡
+│ 3 ┆ 3 ┆ 1 │
+│ 1 ┆ 1 ┆ 1 │
+│ null ┆ null ┆ null │
+│ 2 ┆ 1 ┆ 2 │
+└──────┴─────────┴─────────────────┘
+
>>> my_library_agnostic_function(pa.table(data))
+pyarrow.Table
+a: int64
+cum_min: int64
+cum_min_reverse: int64
+----
+a: [[3,1,null,2]]
+cum_min: [[3,1,null,1]]
+cum_min_reverse: [[1,1,null,2]]
+
cum_prod(*, reverse=False)
+
+Return the cumulative product of the non-null values in the column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 3, None, 2]}
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a").cum_prod().alias("cum_prod"),
+... nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"),
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
+ a cum_prod cum_prod_reverse
+0 1.0 1.0 6.0
+1 3.0 3.0 6.0
+2 NaN NaN NaN
+3 2.0 6.0 2.0
+
>>> my_library_agnostic_function(pl.DataFrame(data))
+shape: (4, 3)
+┌──────┬──────────┬──────────────────┐
+│ a ┆ cum_prod ┆ cum_prod_reverse │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞══════╪══════════╪══════════════════╡
+│ 1 ┆ 1 ┆ 6 │
+│ 3 ┆ 3 ┆ 6 │
+│ null ┆ null ┆ null │
+│ 2 ┆ 6 ┆ 2 │
+└──────┴──────────┴──────────────────┘
+
>>> my_library_agnostic_function(pa.table(data))
+pyarrow.Table
+a: int64
+cum_prod: int64
+cum_prod_reverse: int64
+----
+a: [[1,3,null,2]]
+cum_prod: [[1,3,null,6]]
+cum_prod_reverse: [[6,6,null,2]]
+
cum_sum(*, reverse=False)
+
+Return cumulative sum.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").cum_sum()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 1 2
+1 2 6
+2 5 10
+3 10 16
+4 15 22
+>>> my_library_agnostic_function(df_pl)
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 2 ┆ 6 │
+│ 5 ┆ 10 │
+│ 10 ┆ 16 │
+│ 15 ┆ 22 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,5,10,15]]
+b: [[2,6,10,16,22]]
+
diff()
+
+Returns the difference between each element and the previous one.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(a_diff=nw.col("a").diff()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a_diff
+0 NaN
+1 0.0
+2 2.0
+3 2.0
+4 0.0
+>>> my_library_agnostic_function(df_pl)
+shape: (5, 1)
+┌────────┐
+│ a_diff │
+│ --- │
+│ i64 │
+╞════════╡
+│ null │
+│ 0 │
+│ 2 │
+│ 2 │
+│ 0 │
+└────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a_diff: int64
+----
+a_diff: [[null,0,2,2,0]]
+
drop_nulls()
+
+Remove missing values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+>>> df_pa = pa.table({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").drop_nulls()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 2.0
+1 4.0
+3 3.0
+5 5.0
+>>> my_library_agnostic_function(df_pl) # nan != null for polars
+shape: (5, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2.0 │
+│ 4.0 │
+│ NaN │
+│ 3.0 │
+│ 5.0 │
+└─────┘
+>>> my_library_agnostic_function(df_pa) # nan != null for pyarrow
+pyarrow.Table
+a: double
+----
+a: [[2,4,nan,3,5]]
+
ewm_mean(*, com=None, span=None, half_life=None, alpha=None, adjust=True, min_periods=1, ignore_nulls=False)
+
+Compute exponentially-weighted moving average.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ com
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of center of mass, \(\gamma\), with |
+
+ None
+ |
+
+ span
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of span, \(\theta\), with |
+
+ None
+ |
+
+ half_life
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of half-life, \(\tau\), with |
+
+ None
+ |
+
+ alpha
+ |
+
+ float | None
+ |
+
+
+
+ Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\). + |
+
+ None
+ |
+
+ adjust
+ |
+
+ bool
+ |
+
+
+
+ Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings +
|
+
+ True
+ |
+
+ min_periods
+ |
+
+ int
+ |
+
+
+
+ Minimum number of observations in window required to have a value, (otherwise result is null). + |
+
+ 1
+ |
+
+ ignore_nulls
+ |
+
+ bool
+ |
+
+
+
+ Ignore missing values when calculating weights. +
|
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ Expr + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").ewm_mean(com=1, ignore_nulls=False)
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 1.000000
+1 1.666667
+2 2.428571
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 1)
+┌──────────┐
+│ a │
+│ --- │
+│ f64 │
+╞══════════╡
+│ 1.0 │
+│ 1.666667 │
+│ 2.428571 │
+└──────────┘
+
fill_null(value=None, strategy=None, limit=None)
+
+Fill null values with given value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any | None
+ |
+
+
+
+ Value used to fill null values. + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['forward', 'backward'] | None
+ |
+
+
+
+ Strategy used to fill null values. + |
+
+ None
+ |
+
+ limit
+ |
+
+ int | None
+ |
+
+
+
+ Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [2, 4, None, None, 3, 5],
+... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [2, 4, None, None, 3, 5],
+... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+... }
+... )
+>>> df_pa = pa.table(
+... {
+... "a": [2, 4, None, None, 3, 5],
+... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(nw.col("a", "b").fill_null(0)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 2.0 2.0
+1 4.0 4.0
+2 0.0 0.0
+3 0.0 0.0
+4 3.0 3.0
+5 5.0 5.0
+
>>> my_library_agnostic_function(df_pl) # nan != null for polars
+shape: (6, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 2.0 │
+│ 4 ┆ 4.0 │
+│ 0 ┆ NaN │
+│ 0 ┆ NaN │
+│ 3 ┆ 3.0 │
+│ 5 ┆ 5.0 │
+└─────┴─────┘
+
>>> my_library_agnostic_function(df_pa) # nan != null for pyarrow
+pyarrow.Table
+a: int64
+b: double
+----
+a: [[2,4,0,0,3,5]]
+b: [[2,4,nan,nan,3,5]]
+
Using a strategy:
+>>> def func_strategies(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a", "b")
+... .fill_null(strategy="forward", limit=1)
+... .name.suffix("_filled")
+... ).to_native()
+
>>> func_strategies(df_pd)
+ a b a_filled b_filled
+0 2.0 2.0 2.0 2.0
+1 4.0 4.0 4.0 4.0
+2 NaN NaN 4.0 4.0
+3 NaN NaN NaN NaN
+4 3.0 3.0 3.0 3.0
+5 5.0 5.0 5.0 5.0
+
>>> func_strategies(df_pl) # nan != null for polars
+shape: (6, 4)
+┌──────┬─────┬──────────┬──────────┐
+│ a ┆ b ┆ a_filled ┆ b_filled │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ i64 ┆ f64 │
+╞══════╪═════╪══════════╪══════════╡
+│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │
+│ 4 ┆ 4.0 ┆ 4 ┆ 4.0 │
+│ null ┆ NaN ┆ 4 ┆ NaN │
+│ null ┆ NaN ┆ null ┆ NaN │
+│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │
+│ 5 ┆ 5.0 ┆ 5 ┆ 5.0 │
+└──────┴─────┴──────────┴──────────┘
+
>>> func_strategies(df_pa) # nan != null for pyarrow
+pyarrow.Table
+a: int64
+b: double
+a_filled: int64
+b_filled: double
+----
+a: [[2,4,null,null,3,5]]
+b: [[2,4,nan,nan,3,5]]
+a_filled: [[2,4,4,null,3,5]]
+b_filled: [[2,4,nan,nan,3,5]]
+
filter(*predicates)
+
+Filters elements based on a condition, returning a new expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ predicates
+ |
+
+ Any
+ |
+
+
+
+ Conditions to filter by (which get ANDed together). + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+>>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+>>> df_pa = pa.table({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").filter(nw.col("a") > 4),
+... nw.col("b").filter(nw.col("b") < 13),
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+3 5 10
+4 6 11
+5 7 12
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 5 ┆ 10 │
+│ 6 ┆ 11 │
+│ 7 ┆ 12 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[5,6,7]]
+b: [[10,11,12]]
+
gather_every(n, offset=0)
+
+Take every nth value in the Series and return as new Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+1 2
+3 4
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 4 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,4]]
+
head(n=10)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": list(range(10))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that returns the first 3 rows:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").head(3)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 0
+1 1
+2 2
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 0 │
+│ 1 │
+│ 2 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[0,1,2]]
+
clip(lower_bound=None, upper_bound=None)
+
+Clip values in the Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any | None
+ |
+
+
+
+ Lower bound value. + |
+
+ None
+ |
+
+ upper_bound
+ |
+
+ Any | None
+ |
+
+
+
+ Upper bound value. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+
>>> s = [1, 2, 3]
+>>> df_pd = pd.DataFrame({"s": s})
+>>> df_pl = pl.DataFrame({"s": s})
+>>> df_pa = pa.table({"s": s})
+
We define a library agnostic function:
+>>> def func_lower(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("s").clip(2)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func_lower
:
>>> func_lower(df_pd)
+ s
+0 2
+1 2
+2 3
+>>> func_lower(df_pl)
+shape: (3, 1)
+┌─────┐
+│ s │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> func_lower(df_pa)
+pyarrow.Table
+s: int64
+----
+s: [[2,2,3]]
+
We define another library agnostic function:
+>>> def func_upper(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("s").clip(upper_bound=2)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func_upper
:
>>> func_upper(df_pd)
+ s
+0 1
+1 2
+2 2
+>>> func_upper(df_pl)
+shape: (3, 1)
+┌─────┐
+│ s │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 2 │
+└─────┘
+>>> func_upper(df_pa)
+pyarrow.Table
+s: int64
+----
+s: [[1,2,2]]
+
We can have both at the same time
+>>> s = [-1, 1, -3, 3, -5, 5]
+>>> df_pd = pd.DataFrame({"s": s})
+>>> df_pl = pl.DataFrame({"s": s})
+>>> df_pa = pa.table({"s": s})
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("s").clip(-1, 3)).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ s
+0 -1
+1 1
+2 -1
+3 3
+4 -1
+5 3
+>>> my_library_agnostic_function(df_pl)
+shape: (6, 1)
+┌─────┐
+│ s │
+│ --- │
+│ i64 │
+╞═════╡
+│ -1 │
+│ 1 │
+│ -1 │
+│ 3 │
+│ -1 │
+│ 3 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+s: int64
+----
+s: [[-1,1,-1,3,-1,3]]
+
is_between(lower_bound, upper_bound, closed='both')
+
+Check if this expression is between the given lower and upper bounds.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any
+ |
+
+
+
+ Lower bound value. + |
+ + required + | +
+ upper_bound
+ |
+
+ Any
+ |
+
+
+
+ Upper bound value. + |
+ + required + | +
+ closed
+ |
+
+ str
+ |
+
+
+
+ Define which sides of the interval are closed (inclusive). + |
+
+ 'both'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
+>>> df_pa = pa.table({"a": [1, 2, 3, 4, 5]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").is_between(2, 4, "right")).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 False
+1 False
+2 True
+3 True
+4 False
+>>> my_library_agnostic_function(df_pl)
+shape: (5, 1)
+┌───────┐
+│ a │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ false │
+│ true │
+│ true │
+│ false │
+└───────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+----
+a: [[false,false,true,true,false]]
+
is_duplicated()
+
+Return a boolean mask indicating duplicated values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().is_duplicated()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 True True
+1 False True
+2 False False
+3 True False
+>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ true ┆ true │
+│ false ┆ true │
+│ false ┆ false │
+│ true ┆ false │
+└───────┴───────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true,false,false,true]]
+b: [[true,true,false,false]]
+
is_finite()
+
+Returns boolean values indicating which original values are finite.
+ + +Different backend handle null values differently. is_finite
will return
+False for NaN and Null's in the Dask and pandas non-nullable backend, while
+for Polars, PyArrow and pandas nullable backends null values are kept as such.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ Expression of |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [float("nan"), float("inf"), 2.0, None]}
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").is_finite()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
+ a
+0 False
+1 False
+2 True
+3 False
+>>> my_library_agnostic_function(pl.DataFrame(data))
+shape: (4, 1)
+┌───────┐
+│ a │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ false │
+│ true │
+│ null │
+└───────┘
+
>>> my_library_agnostic_function(pa.table(data))
+pyarrow.Table
+a: bool
+----
+a: [[false,false,true,null]]
+
is_first_distinct()
+
+Return a boolean mask indicating the first occurrence of each distinct value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().is_first_distinct()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 True True
+1 True False
+2 True True
+3 False True
+>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ true ┆ true │
+│ true ┆ false │
+│ true ┆ true │
+│ false ┆ true │
+└───────┴───────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true,true,true,false]]
+b: [[true,false,true,true]]
+
is_in(other)
+
+Check if elements of this expression are present in the other iterable.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Any
+ |
+
+
+
+ iterable + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})
+>>> df_pa = pa.table({"a": [1, 2, 9, 10]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 1 True
+1 2 True
+2 9 False
+3 10 False
+
>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌─────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ bool │
+╞═════╪═══════╡
+│ 1 ┆ true │
+│ 2 ┆ true │
+│ 9 ┆ false │
+│ 10 ┆ false │
+└─────┴───────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: bool
+----
+a: [[1,2,9,10]]
+b: [[true,true,false,false]]
+
is_last_distinct()
+
+Return a boolean mask indicating the last occurrence of each distinct value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().is_last_distinct()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 False False
+1 True True
+2 True True
+3 True True
+>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ false ┆ false │
+│ true ┆ true │
+│ true ┆ true │
+│ true ┆ true │
+└───────┴───────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false,true,true,true]]
+b: [[false,true,true,true]]
+
is_null()
+
+Returns a boolean Series indicating which values are null.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas, Polars and PyArrow handle null values differently. Polars and PyArrow +distinguish between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+>>> df_pl = pl.DataFrame(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+>>> df_pa = pa.table(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b a_is_null b_is_null
+0 2.0 2.0 False False
+1 4.0 4.0 False False
+2 NaN NaN True True
+3 3.0 3.0 False False
+4 5.0 5.0 False False
+
>>> my_library_agnostic_function(df_pl) # nan != null for polars
+shape: (5, 4)
+┌──────┬─────┬───────────┬───────────┐
+│ a ┆ b ┆ a_is_null ┆ b_is_null │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ bool │
+╞══════╪═════╪═══════════╪═══════════╡
+│ 2 ┆ 2.0 ┆ false ┆ false │
+│ 4 ┆ 4.0 ┆ false ┆ false │
+│ null ┆ NaN ┆ true ┆ false │
+│ 3 ┆ 3.0 ┆ false ┆ false │
+│ 5 ┆ 5.0 ┆ false ┆ false │
+└──────┴─────┴───────────┴───────────┘
+
>>> my_library_agnostic_function(df_pa) # nan != null for pyarrow
+pyarrow.Table
+a: int64
+b: double
+a_is_null: bool
+b_is_null: bool
+----
+a: [[2,4,null,3,5]]
+b: [[2,4,nan,3,5]]
+a_is_null: [[false,false,true,false,false]]
+b_is_null: [[false,false,false,false,false]]
+
is_unique()
+
+Return a boolean mask indicating unique values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().is_unique()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 False False
+1 True False
+2 True True
+3 False True
+>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ false ┆ false │
+│ true ┆ false │
+│ true ┆ true │
+│ false ┆ true │
+└───────┴───────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false,true,true,false]]
+b: [[false,false,true,true]]
+
len()
+
+Return the number of elements in the column.
+Null values count towards the total.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that computes the len over different values of "b" column:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
+... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a1 a2
+0 2 1
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a1 ┆ a2 │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 2 ┆ 1 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a1: int64
+a2: int64
+----
+a1: [[2]]
+a2: [[1]]
+
map_batches(function, return_dtype=None)
+
+Apply a custom python function to a whole Series or sequence of Series.
+The output of this custom function is presumed to be either a Series, +or a NumPy array (in which case it will be automatically converted into +a Series).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[Any], Self]
+ |
+
+
+
+ Function to apply to Series. + |
+ + required + | +
+ return_dtype
+ |
+
+ DType | None
+ |
+
+
+
+ Dtype of the output Series. +If not set, the dtype will be inferred based on the first non-null value +that is returned by the function. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a", "b").map_batches(
+... lambda s: s.to_numpy() + 1, return_dtype=nw.Float64
+... )
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 2.0 5.0
+1 3.0 6.0
+2 4.0 7.0
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 2.0 ┆ 5.0 │
+│ 3.0 ┆ 6.0 │
+│ 4.0 ┆ 7.0 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[2,3,4]]
+b: [[5,6,7]]
+
max()
+
+Returns the maximum value(s) from a column(s).
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
+>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})
+>>> df_pa = pa.table({"a": [10, 20], "b": [50, 100]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.max("a", "b")).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 20 100
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 20 ┆ 100 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[20]]
+b: [[100]]
+
mean()
+
+Get mean value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+>>> df_pl = pl.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+>>> df_pa = pa.table({"a": [-1, 0, 1], "b": [2, 4, 6]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").mean()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 0.0 4.0
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 0.0 ┆ 4.0 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[0]]
+b: [[4]]
+
median()
+
+Get median value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
+>>> df_pl = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
+>>> df_pa = pa.table({"a": [1, 8, 3], "b": [4, 5, 2]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").median()).to_native()
+
We can then pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 3.0 4.0
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 3.0 ┆ 4.0 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[3]]
+b: [[4]]
+
min()
+
+Returns the minimum value(s) from a column(s).
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [4, 3]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.min("a", "b")).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 1 3
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1]]
+b: [[3]]
+
mode()
+
+Compute the most occurring value(s).
+Can return multiple values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+
>>> data = {
+... "a": [1, 1, 2, 3],
+... "b": [1, 1, 2, 2],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").mode()).sort("a").to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 1
+
>>> my_library_agnostic_function(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+└─────┘
+
>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1]]
+
null_count()
+
+Count null values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().null_count()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 1 2
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1]]
+b: [[2]]
+
n_unique()
+
+Returns count of unique values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+>>> df_pa = pa.table({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").n_unique()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 5 3
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 5 ┆ 3 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[5]]
+b: [[3]]
+
over(*keys)
+
+Compute expressions over the given groups.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of columns to compute window expression over.
+ Must be names of columns, as opposed to expressions -
+ so, this is a bit less flexible than Polars' |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_min_per_group=nw.col("a").min().over("b")
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b a_min_per_group
+0 1 1 1
+1 2 1 1
+2 3 2 3
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────────────────┐
+│ a ┆ b ┆ a_min_per_group │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪═════════════════╡
+│ 1 ┆ 1 ┆ 1 │
+│ 2 ┆ 1 ┆ 1 │
+│ 3 ┆ 2 ┆ 3 │
+└─────┴─────┴─────────────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+a_min_per_group: int64
+----
+a: [[1,2,3]]
+b: [[1,1,2]]
+a_min_per_group: [[1,1,3]]
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[Any], Self]
+ |
+
+
+
+ Function to apply. + |
+ + required + | +
+ args
+ |
+
+ Any
+ |
+
+
+
+ Positional arguments to pass to function. + |
+
+ ()
+ |
+
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Keyword arguments to pass to function. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Lets define a library-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 2
+1 3
+2 4
+3 5
+>>> my_library_agnostic_function(df_pl)
+shape: (4, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 3 │
+│ 4 │
+│ 5 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,3,4,5]]
+
quantile(quantile, interpolation)
+
+Get quantile value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ quantile
+ |
+
+ float
+ |
+
+
+
+ Quantile between 0.0 and 1.0. + |
+ + required + | +
+ interpolation
+ |
+
+ Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
+ |
+
+
+
+ Interpolation method. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a", "b").quantile(0.5, interpolation="linear")
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 24.5 74.5
+
>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪══════╡
+│ 24.5 ┆ 74.5 │
+└──────┴──────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[24.5]]
+b: [[74.5]]
+
replace_strict(old, new=None, *, return_dtype=None)
+
+Replace all values by different values.
+This function must replace all non-null input values (else it raises an error).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ old
+ |
+
+ Sequence[Any] | Mapping[Any, Any]
+ |
+
+
+
+ Sequence of values to replace. It also accepts a mapping of values to
+their replacement as syntactic sugar for
+ |
+ + required + | +
+ new
+ |
+
+ Sequence[Any] | None
+ |
+
+
+
+ Sequence of values to replace by. Length must match the length of |
+
+ None
+ |
+
+ return_dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ The data type of the resulting expression. If set to |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> df_pd = pd.DataFrame({"a": [3, 0, 1, 2]})
+>>> df_pl = pl.DataFrame({"a": [3, 0, 1, 2]})
+>>> df_pa = pa.table({"a": [3, 0, 1, 2]})
+
Let's define dataframe-agnostic functions:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... b=nw.col("a").replace_strict(
+... [0, 1, 2, 3],
+... ["zero", "one", "two", "three"],
+... return_dtype=nw.String,
+... )
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 3 three
+1 0 zero
+2 1 one
+3 2 two
+>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌─────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ str │
+╞═════╪═══════╡
+│ 3 ┆ three │
+│ 0 ┆ zero │
+│ 1 ┆ one │
+│ 2 ┆ two │
+└─────┴───────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: string
+----
+a: [[3,0,1,2]]
+b: [["three","zero","one","two"]]
+
rolling_mean(window_size, *, min_periods=None, center=False)
+
+Apply a rolling mean (moving mean) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their mean.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1.0, 2.0, None, 4.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... b=nw.col("a").rolling_mean(window_size=3, min_periods=1)
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> agnostic_rolling_mean(df_pd)
+ a b
+0 1.0 1.0
+1 2.0 1.5
+2 NaN 1.5
+3 4.0 3.0
+
>>> agnostic_rolling_mean(df_pl)
+shape: (4, 2)
+┌──────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪═════╡
+│ 1.0 ┆ 1.0 │
+│ 2.0 ┆ 1.5 │
+│ null ┆ 1.5 │
+│ 4.0 ┆ 3.0 │
+└──────┴─────┘
+
>>> agnostic_rolling_mean(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[1,2,null,4]]
+b: [[1,1.5,1.5,3]]
+
rolling_sum(window_size, *, min_periods=None, center=False)
+
+Apply a rolling sum (moving sum) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their sum.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1.0, 2.0, None, 4.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... b=nw.col("a").rolling_sum(window_size=3, min_periods=1)
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> agnostic_rolling_sum(df_pd)
+ a b
+0 1.0 1.0
+1 2.0 3.0
+2 NaN 3.0
+3 4.0 6.0
+
>>> agnostic_rolling_sum(df_pl)
+shape: (4, 2)
+┌──────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪═════╡
+│ 1.0 ┆ 1.0 │
+│ 2.0 ┆ 3.0 │
+│ null ┆ 3.0 │
+│ 4.0 ┆ 6.0 │
+└──────┴─────┘
+
>>> agnostic_rolling_sum(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[1,2,null,4]]
+b: [[1,3,3,6]]
+
round(decimals=0)
+
+Round underlying floating point data by decimals
digits.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ decimals
+ |
+
+ int
+ |
+
+
+
+ Number of decimals to round by. + |
+
+ 0
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and +4.5 to 4.0, etc..).
+Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1.12345, 2.56789, 3.901234]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").round(1)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 1.1
+1 2.6
+2 3.9
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 1.1 │
+│ 2.6 │
+│ 3.9 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[1.1,2.6,3.9]]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample randomly from this expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3]})
+>>> df_pa = pa.table({"a": [1, 2, 3]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").sample(fraction=1.0, with_replacement=True)
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+2 3
+0 1
+2 3
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2 │
+│ 3 │
+│ 3 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1,3,3]]
+
shift(n)
+
+Shift values by n
positions.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of positions to shift values by. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to shift
+and fill missing values with 0
in a Int64 column, you could
+do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(a_shift=nw.col("a").shift(n=1)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a_shift
+0 NaN
+1 1.0
+2 1.0
+3 3.0
+4 5.0
+>>> my_library_agnostic_function(df_pl)
+shape: (5, 1)
+┌─────────┐
+│ a_shift │
+│ --- │
+│ i64 │
+╞═════════╡
+│ null │
+│ 1 │
+│ 1 │
+│ 3 │
+│ 5 │
+└─────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a_shift: int64
+----
+a_shift: [[null,1,1,3,5]]
+
sort(*, descending=False, nulls_last=False)
+
+Sort this column. Place null values first.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Sort in descending order. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last instead of first. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
+>>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})
+>>> df_pa = pa.table({"a": [5, None, 1, 2]})
+
Let's define dataframe-agnostic functions:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").sort()).to_native()
+
>>> def func_descend(df):
+... df = nw.from_native(df)
+... df = df.select(nw.col("a").sort(descending=True))
+... return nw.to_native(df)
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+1 NaN
+2 1.0
+3 2.0
+0 5.0
+>>> my_library_agnostic_function(df_pl)
+shape: (4, 1)
+┌──────┐
+│ a │
+│ --- │
+│ i64 │
+╞══════╡
+│ null │
+│ 1 │
+│ 2 │
+│ 5 │
+└──────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[null,1,2,5]]
+
>>> func_descend(df_pd)
+ a
+1 NaN
+0 5.0
+3 2.0
+2 1.0
+>>> func_descend(df_pl)
+shape: (4, 1)
+┌──────┐
+│ a │
+│ --- │
+│ i64 │
+╞══════╡
+│ null │
+│ 5 │
+│ 2 │
+│ 1 │
+└──────┘
+>>> func_descend(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[null,5,2,1]]
+
skew()
+
+Calculate the sample skewness of a column.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ An expression representing the sample skewness of the column. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
+>>> df_pa = pa.Table.from_pandas(df_pd)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").skew())
+
We can then pass pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 0.0 1.472427
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪══════════╡
+│ 0.0 ┆ 1.472427 │
+└─────┴──────────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[0]]
+b: [[1.4724267269058975]]
+
std(*, ddof=1)
+
+Get standard deviation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ ddof
+ |
+
+ int
+ |
+
+
+
+ "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, + where N represents the number of elements. By default ddof is 1. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+>>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+>>> df_pa = pa.table({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").std(ddof=0)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 17.79513 1.265789
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌──────────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════════╪══════════╡
+│ 17.79513 ┆ 1.265789 │
+└──────────┴──────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[17.795130420052185]]
+b: [[1.2657891697365016]]
+
sum()
+
+Return the sum value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
+>>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})
+>>> df_pa = pa.table({"a": [5, 10], "b": [50, 100]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").sum()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 15 150
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 15 ┆ 150 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[15]]
+b: [[150]]
+
tail(n=10)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": list(range(10))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that returns the last 3 rows:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").tail(3)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+7 7
+8 8
+9 9
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 7 │
+│ 8 │
+│ 9 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[7,8,9]]
+
unique(*, maintain_order=False)
+
+Return unique values of this expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original expression. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").unique(maintain_order=True)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 1 2
+1 3 4
+2 5 6
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 3 ┆ 4 │
+│ 5 ┆ 6 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,3,5]]
+b: [[2,4,6]]
+
narwhals.Expr.cat
get_categories()
+
+Get unique categories from column.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+Let's create some dataframes:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"fruits": ["apple", "mango", "mango"]}
+>>> df_pd = pd.DataFrame(data, dtype="category")
+>>> df_pl = pl.DataFrame(data, schema={"fruits": pl.Categorical})
+
We define a dataframe-agnostic function to get unique categories +from column 'fruits':
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("fruits").cat.get_categories()).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ fruits
+0 apple
+1 mango
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌────────┐
+│ fruits │
+│ --- │
+│ str │
+╞════════╡
+│ apple │
+│ mango │
+└────────┘
+
narwhals.Expr.dt
convert_time_zone(time_zone)
+
+Convert to a new time zone.
+If converting from a time-zone-naive column, then conversion happens +as if converting from UTC.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime, timezone
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").dt.convert_time_zone("Asia/Kathmandu")
+... ).to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> my_library_agnostic_function(df_pd)
+ a
+0 2024-01-01 05:45:00+05:45
+1 2024-01-02 05:45:00+05:45
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌──────────────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs, Asia/Kathmandu] │
+╞══════════════════════════════╡
+│ 2024-01-01 05:45:00 +0545 │
+│ 2024-01-02 05:45:00 +0545 │
+└──────────────────────────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: timestamp[us, tz=Asia/Kathmandu]
+----
+a: [[2024-01-01 00:00:00.000000Z,2024-01-02 00:00:00.000000Z]]
+
date()
+
+Extract the date from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If pandas default backend is being used. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]}
+>>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow")
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").dt.date()).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 2012-01-07
+1 2023-03-10
+
>>> my_library_agnostic_function(df_pl) # docetst
+shape: (2, 1)
+┌────────────┐
+│ a │
+│ --- │
+│ date │
+╞════════════╡
+│ 2012-01-07 │
+│ 2023-03-10 │
+└────────────┘
+
day()
+
+Extract day from underlying DateTime representation.
+Returns the day of month starting from 1. The return value ranges from 1 to 31. (The last day of month differs by months.)
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.year().alias("year"),
+... nw.col("datetime").dt.month().alias("month"),
+... nw.col("datetime").dt.day().alias("day"),
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime year month day
+0 1978-06-01 1978 6 1
+1 2024-12-13 2024 12 13
+2 2065-01-01 2065 1 1
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 4)
+┌─────────────────────┬──────┬───────┬─────┐
+│ datetime ┆ year ┆ month ┆ day │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i32 ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪═══════╪═════╡
+│ 1978-06-01 00:00:00 ┆ 1978 ┆ 6 ┆ 1 │
+│ 2024-12-13 00:00:00 ┆ 2024 ┆ 12 ┆ 13 │
+│ 2065-01-01 00:00:00 ┆ 2065 ┆ 1 ┆ 1 │
+└─────────────────────┴──────┴───────┴─────┘
+
hour()
+
+Extract hour from underlying DateTime representation.
+Returns the hour number from 0 to 23.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1),
+... datetime(2024, 10, 13, 5),
+... datetime(2065, 1, 1, 10),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour")
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime hour
+0 1978-01-01 01:00:00 1
+1 2024-10-13 05:00:00 5
+2 2065-01-01 10:00:00 10
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌─────────────────────┬──────┐
+│ datetime ┆ hour │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪══════╡
+│ 1978-01-01 01:00:00 ┆ 1 │
+│ 2024-10-13 05:00:00 ┆ 5 │
+│ 2065-01-01 10:00:00 ┆ 10 │
+└─────────────────────┴──────┘
+
microsecond()
+
+Extract microseconds from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 505000),
+... datetime(2065, 1, 1, 10, 20, 30, 67000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.microsecond().alias("microsecond"),
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime hour minute second microsecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.505 5 30 14 505000
+2 2065-01-01 10:20:30.067 10 20 30 67000
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.505 ┆ 5 ┆ 30 ┆ 14 ┆ 505000 │
+│ 2065-01-01 10:20:30.067 ┆ 10 ┆ 20 ┆ 30 ┆ 67000 │
+└─────────────────────────┴──────┴────────┴────────┴─────────────┘
+
millisecond()
+
+Extract milliseconds from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 505000),
+... datetime(2065, 1, 1, 10, 20, 30, 67000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.millisecond().alias("millisecond"),
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime hour minute second millisecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.505 5 30 14 505
+2 2065-01-01 10:20:30.067 10 20 30 67
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.505 ┆ 5 ┆ 30 ┆ 14 ┆ 505 │
+│ 2065-01-01 10:20:30.067 ┆ 10 ┆ 20 ┆ 30 ┆ 67 │
+└─────────────────────────┴──────┴────────┴────────┴─────────────┘
+
minute()
+
+Extract minutes from underlying DateTime representation.
+Returns the minute number from 0 to 59.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1),
+... datetime(2024, 10, 13, 5, 30),
+... datetime(2065, 1, 1, 10, 20),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime hour minute
+0 1978-01-01 01:01:00 1 1
+1 2024-10-13 05:30:00 5 30
+2 2065-01-01 10:20:00 10 20
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 3)
+┌─────────────────────┬──────┬────────┐
+│ datetime ┆ hour ┆ minute │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪════════╡
+│ 1978-01-01 01:01:00 ┆ 1 ┆ 1 │
+│ 2024-10-13 05:30:00 ┆ 5 ┆ 30 │
+│ 2065-01-01 10:20:00 ┆ 10 ┆ 20 │
+└─────────────────────┴──────┴────────┘
+
month()
+
+Extract month from underlying DateTime representation.
+Returns the month number starting from 1. The return value ranges from 1 to 12.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.year().alias("year"),
+... nw.col("datetime").dt.month().alias("month"),
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime year month
+0 1978-06-01 1978 6
+1 2024-12-13 2024 12
+2 2065-01-01 2065 1
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 3)
+┌─────────────────────┬──────┬───────┐
+│ datetime ┆ year ┆ month │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i32 ┆ i8 │
+╞═════════════════════╪══════╪═══════╡
+│ 1978-06-01 00:00:00 ┆ 1978 ┆ 6 │
+│ 2024-12-13 00:00:00 ┆ 2024 ┆ 12 │
+│ 2065-01-01 00:00:00 ┆ 2065 ┆ 1 │
+└─────────────────────┴──────┴───────┘
+
nanosecond()
+
+Extract Nanoseconds from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 500000),
+... datetime(2065, 1, 1, 10, 20, 30, 60000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.nanosecond().alias("nanosecond"),
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime hour minute second nanosecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.500 5 30 14 500000000
+2 2065-01-01 10:20:30.060 10 20 30 60000000
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
+│ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
+└─────────────────────────┴──────┴────────┴────────┴────────────┘
+
ordinal_day()
+
+Get ordinal day.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_ordinal_day=nw.col("a").dt.ordinal_day()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a a_ordinal_day
+0 2020-01-01 1
+1 2020-08-03 216
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌─────────────────────┬───────────────┐
+│ a ┆ a_ordinal_day │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i16 │
+╞═════════════════════╪═══════════════╡
+│ 2020-01-01 00:00:00 ┆ 1 │
+│ 2020-08-03 00:00:00 ┆ 216 │
+└─────────────────────┴───────────────┘
+
replace_time_zone(time_zone)
+
+Replace time zone.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str | None
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime, timezone
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").dt.replace_time_zone("Asia/Kathmandu")
+... ).to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> my_library_agnostic_function(df_pd)
+ a
+0 2024-01-01 00:00:00+05:45
+1 2024-01-02 00:00:00+05:45
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌──────────────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs, Asia/Kathmandu] │
+╞══════════════════════════════╡
+│ 2024-01-01 00:00:00 +0545 │
+│ 2024-01-02 00:00:00 +0545 │
+└──────────────────────────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: timestamp[us, tz=Asia/Kathmandu]
+----
+a: [[2023-12-31 18:15:00.000000Z,2024-01-01 18:15:00.000000Z]]
+
second()
+
+Extract seconds from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1),
+... datetime(2024, 10, 13, 5, 30, 14),
+... datetime(2065, 1, 1, 10, 20, 30),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime hour minute second
+0 1978-01-01 01:01:01 1 1 1
+1 2024-10-13 05:30:14 5 30 14
+2 2065-01-01 10:20:30 10 20 30
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 4)
+┌─────────────────────┬──────┬────────┬────────┐
+│ datetime ┆ hour ┆ minute ┆ second │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪════════╪════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 │
+│ 2024-10-13 05:30:14 ┆ 5 ┆ 30 ┆ 14 │
+│ 2065-01-01 10:20:30 ┆ 10 ┆ 20 ┆ 30 │
+└─────────────────────┴──────┴────────┴────────┘
+
timestamp(time_unit='us')
+
+Return a timestamp in the given time unit.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['ns', 'us', 'ms']
+ |
+
+
+
+ {'ns', 'us', 'ms'} +Time unit. + |
+
+ 'us'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import date
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"date": [date(2001, 1, 1), None, date(2001, 1, 3)]}
+>>> df_pd = pd.DataFrame(data, dtype="datetime64[ns]")
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("date").dt.timestamp().alias("timestamp_us"),
+... nw.col("date").dt.timestamp("ms").alias("timestamp_ms"),
+... ).to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> my_library_agnostic_function(df_pd)
+ date timestamp_us timestamp_ms
+0 2001-01-01 9.783072e+14 9.783072e+11
+1 NaT NaN NaN
+2 2001-01-03 9.784800e+14 9.784800e+11
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 3)
+┌────────────┬─────────────────┬──────────────┐
+│ date ┆ timestamp_us ┆ timestamp_ms │
+│ --- ┆ --- ┆ --- │
+│ date ┆ i64 ┆ i64 │
+╞════════════╪═════════════════╪══════════════╡
+│ 2001-01-01 ┆ 978307200000000 ┆ 978307200000 │
+│ null ┆ null ┆ null │
+│ 2001-01-03 ┆ 978480000000000 ┆ 978480000000 │
+└────────────┴─────────────────┴──────────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+date: date32[day]
+timestamp_us: int64
+timestamp_ms: int64
+----
+date: [[2001-01-01,null,2001-01-03]]
+timestamp_us: [[978307200000000,null,978480000000000]]
+timestamp_ms: [[978307200000,null,978480000000]]
+
total_microseconds()
+
+Get total microseconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total microseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": [
+... timedelta(microseconds=10),
+... timedelta(milliseconds=1, microseconds=200),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_total_microseconds=nw.col("a").dt.total_microseconds()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a a_total_microseconds
+0 0 days 00:00:00.000010 10
+1 0 days 00:00:00.001200 1200
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌──────────────┬──────────────────────┐
+│ a ┆ a_total_microseconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪══════════════════════╡
+│ 10µs ┆ 10 │
+│ 1200µs ┆ 1200 │
+└──────────────┴──────────────────────┘
+
total_milliseconds()
+
+Get total milliseconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total milliseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": [
+... timedelta(milliseconds=10),
+... timedelta(milliseconds=20, microseconds=40),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_total_milliseconds=nw.col("a").dt.total_milliseconds()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a a_total_milliseconds
+0 0 days 00:00:00.010000 10
+1 0 days 00:00:00.020040 20
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌──────────────┬──────────────────────┐
+│ a ┆ a_total_milliseconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪══════════════════════╡
+│ 10ms ┆ 10 │
+│ 20040µs ┆ 20 │
+└──────────────┴──────────────────────┘
+
total_minutes()
+
+Get total minutes.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total minutes in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_total_minutes=nw.col("a").dt.total_minutes()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a a_total_minutes
+0 0 days 00:10:00 10
+1 0 days 00:20:40 20
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌──────────────┬─────────────────┐
+│ a ┆ a_total_minutes │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪═════════════════╡
+│ 10m ┆ 10 │
+│ 20m 40s ┆ 20 │
+└──────────────┴─────────────────┘
+
total_nanoseconds()
+
+Get total nanoseconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total nanoseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+>>> df_pd = pd.DataFrame({"a": pd.to_datetime(data)})
+>>> df_pl = pl.DataFrame({"a": data}).with_columns(
+... pl.col("a").str.to_datetime(time_unit="ns")
+... )
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_diff_total_nanoseconds=nw.col("a").diff().dt.total_nanoseconds()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a a_diff_total_nanoseconds
+0 2024-01-01 00:00:00.000000001 NaN
+1 2024-01-01 00:00:00.000000002 1.0
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌───────────────────────────────┬──────────────────────────┐
+│ a ┆ a_diff_total_nanoseconds │
+│ --- ┆ --- │
+│ datetime[ns] ┆ i64 │
+╞═══════════════════════════════╪══════════════════════════╡
+│ 2024-01-01 00:00:00.000000001 ┆ null │
+│ 2024-01-01 00:00:00.000000002 ┆ 1 │
+└───────────────────────────────┴──────────────────────────┘
+
total_seconds()
+
+Get total seconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total seconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_total_seconds=nw.col("a").dt.total_seconds()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a a_total_seconds
+0 0 days 00:00:10 10
+1 0 days 00:00:20.040000 20
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌──────────────┬─────────────────┐
+│ a ┆ a_total_seconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪═════════════════╡
+│ 10s ┆ 10 │
+│ 20s 40ms ┆ 20 │
+└──────────────┴─────────────────┘
+
to_string(format)
+
+Convert a Date/Time/Datetime column into a String column with the given format.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str
+ |
+
+
+
+ Format to format temporal column with. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Unfortunately, different libraries interpret format directives a bit +differently.
+"%.f"
for fractional seconds,
+ whereas pandas and Python stdlib use ".%f"
."%S"
as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".Therefore, we make the following adjustments:
+"%S.%f"
with "%S%.f"
."%S.%f"
with "%S"
.Workarounds like these don't make us happy, and we try to avoid them as +much as possible, but here we feel like it's the best compromise.
+If you just want to format a date/datetime Series as a local datetime +string, and have it work as consistently as possible across libraries, +we suggest using:
+"%Y-%m-%dT%H:%M:%S%.f"
for datetimes"%Y-%m-%d"
for datesthough note that, even then, different tools may return a different number +of trailing zeros. Nonetheless, this is probably consistent enough for +most applications.
+If you have an application where this is not enough, please open an issue +and let us know.
+Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = [
+... datetime(2020, 3, 1),
+... datetime(2020, 4, 1),
+... datetime(2020, 5, 1),
+... ]
+>>> df_pd = pd.DataFrame({"a": data})
+>>> df_pl = pl.DataFrame({"a": data})
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").dt.to_string("%Y/%m/%d %H:%M:%S")
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 2020/03/01 00:00:00
+1 2020/04/01 00:00:00
+2 2020/05/01 00:00:00
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ str │
+╞═════════════════════╡
+│ 2020/03/01 00:00:00 │
+│ 2020/04/01 00:00:00 │
+│ 2020/05/01 00:00:00 │
+└─────────────────────┘
+
year()
+
+Extract year from underlying DateTime representation.
+Returns the year number in the calendar date.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.year().alias("year")
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ datetime year
+0 1978-06-01 1978
+1 2024-12-13 2024
+2 2065-01-01 2065
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌─────────────────────┬──────┐
+│ datetime ┆ year │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i32 │
+╞═════════════════════╪══════╡
+│ 1978-06-01 00:00:00 ┆ 1978 │
+│ 2024-12-13 00:00:00 ┆ 2024 │
+│ 2065-01-01 00:00:00 ┆ 2065 │
+└─────────────────────┴──────┘
+
narwhals.Expr.list
len()
+
+Return the number of elements in each list.
+Null values count towards the total.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [[1, 2], [3, 4, None], None, []]}
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_list_len(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(a_len=nw.col("a").list.len()).to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> agnostic_list_len(
+... pd.DataFrame(data).astype({"a": pd.ArrowDtype(pa.list_(pa.int64()))})
+... )
+ a a_len
+0 [1. 2.] 2
+1 [ 3. 4. nan] 3
+2 <NA> <NA>
+3 [] 0
+
>>> agnostic_list_len(pl.DataFrame(data))
+shape: (4, 2)
+┌──────────────┬───────┐
+│ a ┆ a_len │
+│ --- ┆ --- │
+│ list[i64] ┆ u32 │
+╞══════════════╪═══════╡
+│ [1, 2] ┆ 2 │
+│ [3, 4, null] ┆ 3 │
+│ null ┆ null │
+│ [] ┆ 0 │
+└──────────────┴───────┘
+
>>> agnostic_list_len(pa.table(data))
+pyarrow.Table
+a: list<item: int64>
+ child 0, item: int64
+a_len: uint32
+----
+a: [[[1,2],[3,4,null],null,[]]]
+a_len: [[2,3,null,0]]
+
narwhals.Expr.name
keep()
+
+Keep the original root name of the expression.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("foo").alias("alias_for_foo").name.keep()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd).columns
+Index(['foo'], dtype='object')
+>>> my_library_agnostic_function(df_pl).columns
+['foo']
+
map(function)
+
+Rename the output of an expression by mapping a function over the root name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[str], str]
+ |
+
+
+
+ Function that maps a root name to a new name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> renaming_func = lambda s: s[::-1] # reverse column name
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.map(renaming_func)).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd).columns
+Index(['oof', 'RAB'], dtype='object')
+>>> my_library_agnostic_function(df_pl).columns
+['oof', 'RAB']
+
prefix(prefix)
+
+Add a prefix to the root column name of the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ Prefix to add to the root column name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def add_colname_prefix(df_native: IntoFrameT, prefix: str) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.prefix(prefix)).to_native()
+
We can then pass either pandas or Polars to func
:
>>> add_colname_prefix(df_pd, "with_prefix_").columns
+Index(['with_prefix_foo', 'with_prefix_BAR'], dtype='object')
+
>>> add_colname_prefix(df_pl, "with_prefix_").columns
+['with_prefix_foo', 'with_prefix_BAR']
+
suffix(suffix)
+
+Add a suffix to the root column name of the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to add to the root column name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def add_colname_suffix(df_native: IntoFrameT, suffix: str) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.suffix(suffix)).to_native()
+
We can then pass either pandas or Polars to func
:
>>> add_colname_suffix(df_pd, "_with_suffix").columns
+Index(['foo_with_suffix', 'BAR_with_suffix'], dtype='object')
+>>> add_colname_suffix(df_pl, "_with_suffix").columns
+['foo_with_suffix', 'BAR_with_suffix']
+
to_lowercase()
+
+Make the root column name lowercase.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def to_lower(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.to_lowercase()).to_native()
+
We can then pass either pandas or Polars to func
:
>>> to_lower(df_pd).columns
+Index(['foo', 'bar'], dtype='object')
+>>> to_lower(df_pl).columns
+['foo', 'bar']
+
to_uppercase()
+
+Make the root column name uppercase.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def to_upper(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.to_uppercase()).to_native()
+
We can then pass either pandas or Polars to func
:
>>> to_upper(df_pd).columns
+Index(['FOO', 'BAR'], dtype='object')
+>>> to_upper(df_pl).columns
+['FOO', 'BAR']
+
narwhals.Expr.str
contains(pattern, *, literal=False)
+
+Check if string contains a substring that matches a pattern.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A Character sequence or valid regular expression pattern. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ If True, treats the pattern as a literal string. + If False, assumes the pattern is a regular expression. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... default_match=nw.col("pets").str.contains("parrot|Dove"),
+... case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove"),
+... literal_match=nw.col("pets").str.contains(
+... "parrot|Dove", literal=True
+... ),
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ pets default_match case_insensitive_match literal_match
+0 cat False False False
+1 dog False False False
+2 rabbit and parrot True True False
+3 dove False True False
+4 None None None None
+>>> my_library_agnostic_function(df_pl)
+shape: (5, 4)
+┌───────────────────┬───────────────┬────────────────────────┬───────────────┐
+│ pets ┆ default_match ┆ case_insensitive_match ┆ literal_match │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ str ┆ bool ┆ bool ┆ bool │
+╞═══════════════════╪═══════════════╪════════════════════════╪═══════════════╡
+│ cat ┆ false ┆ false ┆ false │
+│ dog ┆ false ┆ false ┆ false │
+│ rabbit and parrot ┆ true ┆ true ┆ false │
+│ dove ┆ false ┆ true ┆ false │
+│ null ┆ null ┆ null ┆ null │
+└───────────────────┴───────────────┴────────────────────────┴───────────────┘
+
ends_with(suffix)
+
+Check if string values end with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ suffix substring + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... has_suffix=nw.col("fruits").str.ends_with("ngo")
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ fruits has_suffix
+0 apple False
+1 mango True
+2 None None
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌────────┬────────────┐
+│ fruits ┆ has_suffix │
+│ --- ┆ --- │
+│ str ┆ bool │
+╞════════╪════════════╡
+│ apple ┆ false │
+│ mango ┆ true │
+│ null ┆ null │
+└────────┴────────────┘
+
head(n=5)
+
+Take the first n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is not supported. + |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
If the length of the string has fewer than n
characters, the full string is returned.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... lyrics_head=nw.col("lyrics").str.head()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ lyrics lyrics_head
+0 Atatata Atata
+1 taata taata
+2 taatatata taata
+3 zukkyun zukky
+
>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_head │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ Atata │
+│ taata ┆ taata │
+│ taatatata ┆ taata │
+│ zukkyun ┆ zukky │
+└───────────┴─────────────┘
+
len_chars()
+
+Return the length of each string as the number of characters.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"words": ["foo", "Café", "345", "東京", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... words_len=nw.col("words").str.len_chars()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ words words_len
+0 foo 3.0
+1 Café 4.0
+2 345 3.0
+3 東京 2.0
+4 None NaN
+
>>> my_library_agnostic_function(df_pl)
+shape: (5, 2)
+┌───────┬───────────┐
+│ words ┆ words_len │
+│ --- ┆ --- │
+│ str ┆ u32 │
+╞═══════╪═══════════╡
+│ foo ┆ 3 │
+│ Café ┆ 4 │
+│ 345 ┆ 3 │
+│ 東京 ┆ 2 │
+│ null ┆ null │
+└───────┴───────────┘
+
replace(pattern, value, *, literal=False, n=1)
+
+Replace first matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of matches to replace. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"foo": ["123abc", "abc abc123"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... df = df.with_columns(replaced=nw.col("foo").str.replace("abc", ""))
+... return df.to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ foo replaced
+0 123abc 123
+1 abc abc123 abc123
+
>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌────────────┬──────────┐
+│ foo ┆ replaced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════════╪══════════╡
+│ 123abc ┆ 123 │
+│ abc abc123 ┆ abc123 │
+└────────────┴──────────┘
+
replace_all(pattern, value, *, literal=False)
+
+Replace all matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"foo": ["123abc", "abc abc123"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... df = df.with_columns(replaced=nw.col("foo").str.replace_all("abc", ""))
+... return df.to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ foo replaced
+0 123abc 123
+1 abc abc123 123
+
>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌────────────┬──────────┐
+│ foo ┆ replaced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════════╪══════════╡
+│ 123abc ┆ 123 │
+│ abc abc123 ┆ 123 │
+└────────────┴──────────┘
+
slice(offset, length=None)
+
+Create subslices of the string values of an expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ offset
+ |
+
+ int
+ |
+
+
+
+ Start index. Negative indexing is supported. + |
+ + required + | +
+ length
+ |
+
+ int | None
+ |
+
+
+
+ Length of the slice. If set to |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"s": ["pear", None, "papaya", "dragonfruit"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... s_sliced=nw.col("s").str.slice(4, length=3)
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ s s_sliced
+0 pear
+1 None None
+2 papaya ya
+3 dragonfruit onf
+
>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌─────────────┬──────────┐
+│ s ┆ s_sliced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═════════════╪══════════╡
+│ pear ┆ │
+│ null ┆ null │
+│ papaya ┆ ya │
+│ dragonfruit ┆ onf │
+└─────────────┴──────────┘
+
Using negative indexes:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(s_sliced=nw.col("s").str.slice(-3)).to_native()
+
>>> my_library_agnostic_function(df_pd)
+ s s_sliced
+0 pear ear
+1 None None
+2 papaya aya
+3 dragonfruit uit
+
>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌─────────────┬──────────┐
+│ s ┆ s_sliced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═════════════╪══════════╡
+│ pear ┆ ear │
+│ null ┆ null │
+│ papaya ┆ aya │
+│ dragonfruit ┆ uit │
+└─────────────┴──────────┘
+
starts_with(prefix)
+
+Check if string values start with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ prefix substring + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... has_prefix=nw.col("fruits").str.starts_with("app")
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ fruits has_prefix
+0 apple True
+1 mango False
+2 None None
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌────────┬────────────┐
+│ fruits ┆ has_prefix │
+│ --- ┆ --- │
+│ str ┆ bool │
+╞════════╪════════════╡
+│ apple ┆ true │
+│ mango ┆ false │
+│ null ┆ null │
+└────────┴────────────┘
+
strip_chars(characters=None)
+
+Remove leading and trailing characters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ characters
+ |
+
+ str | None
+ |
+
+
+
+ The set of characters to be removed. All combinations of this set of characters will be stripped from the start and end of the string. If set to None (default), all leading and trailing whitespace is removed instead. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> from typing import Any
+>>> data = {"fruits": ["apple", "\nmango"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrame) -> dict[str, Any]:
+... df = nw.from_native(df_native)
+... df = df.with_columns(stripped=nw.col("fruits").str.strip_chars())
+... return df.to_dict(as_series=False)
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+{'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']}
+
>>> my_library_agnostic_function(df_pl)
+{'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']}
+
tail(n=5)
+
+Take the last n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is not supported. + |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
If the length of the string has fewer than n
characters, the full string is returned.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... lyrics_tail=nw.col("lyrics").str.tail()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ lyrics lyrics_tail
+0 Atatata atata
+1 taata taata
+2 taatatata atata
+3 zukkyun kkyun
+
>>> my_library_agnostic_function(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_tail │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ atata │
+│ taata ┆ taata │
+│ taatatata ┆ atata │
+│ zukkyun ┆ kkyun │
+└───────────┴─────────────┘
+
to_datetime(format=None)
+
+Convert to Datetime dtype.
+ + +As different backends auto-infer format in different ways, if format=None
+there is no guarantee that the result will be equal.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str | None
+ |
+
+
+
+ Format to use for conversion. If set to None (default), the format is +inferred from the data. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
pandas defaults to nanosecond time unit, Polars to microsecond. +Prior to pandas 2.0, nanoseconds were the only time unit supported +in pandas, with no ability to set any other one. The ability to +set the time unit in pandas, if the version permits, will arrive.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = ["2020-01-01", "2020-01-02"]
+>>> df_pd = pd.DataFrame({"a": data})
+>>> df_pl = pl.DataFrame({"a": data})
+>>> df_pa = pa.table({"a": data})
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").str.to_datetime(format="%Y-%m-%d")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or PyArrow:
+>>> my_library_agnostic_function(df_pd)
+ a
+0 2020-01-01
+1 2020-01-02
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs] │
+╞═════════════════════╡
+│ 2020-01-01 00:00:00 │
+│ 2020-01-02 00:00:00 │
+└─────────────────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: timestamp[us]
+----
+a: [[2020-01-01 00:00:00.000000,2020-01-02 00:00:00.000000]]
+
to_lowercase()
+
+Transform string to lowercase variant.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"fruits": ["APPLE", "MANGO", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... lower_col=nw.col("fruits").str.to_lowercase()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ fruits lower_col
+0 APPLE apple
+1 MANGO mango
+2 None None
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ lower_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ APPLE ┆ apple │
+│ MANGO ┆ mango │
+│ null ┆ null │
+└────────┴───────────┘
+
to_uppercase()
+
+Transform string to uppercase variant.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. +For more info see the related issue. +There may be other unicode-edge-case-related variations across implementations.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... upper_col=nw.col("fruits").str.to_uppercase()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ fruits upper_col
+0 apple APPLE
+1 mango MANGO
+2 None None
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ upper_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ apple ┆ APPLE │
+│ mango ┆ MANGO │
+│ null ┆ null │
+└────────┴───────────┘
+
narwhals.GroupBy
agg(*aggs, **named_aggs)
+
+Compute aggregations for each group of a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ aggs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Aggregations to compute for each group of the group by operation, +specified as positional arguments. + |
+
+ ()
+ |
+
+ named_aggs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional aggregations, specified as keyword arguments. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrameT
+ |
+
+
+
+ A new Dataframe. + |
+
Examples:
+Group by one column or by multiple columns and call agg
to compute
+the grouped sum of another column.
>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+
We define library agnostic functions:
+>>> @nw.narwhalify
+... def func(df):
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
+
>>> @nw.narwhalify
+... def func_mult_col(df):
+... return df.group_by("a", "b").agg(nw.sum("c")).sort("a", "b")
+
We can then pass either pandas or Polars to func
and func_mult_col
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> func_mult_col(df_pd)
+ a b c
+0 a 1 8
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func_mult_col(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 8 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
narwhals.LazyGroupBy
agg(*aggs, **named_aggs)
+
+Compute aggregations for each group of a group by operation.
+If a library does not support lazy execution, then this is a no-op.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ aggs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Aggregations to compute for each group of the group by operation, +specified as positional arguments. + |
+
+ ()
+ |
+
+ named_aggs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional aggregations, specified as keyword arguments. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyFrameT
+ |
+
+
+
+ A new LazyFrame. + |
+
Examples:
+Group by one column or by multiple columns and call agg
to compute
+the grouped sum of another column.
>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+
We define library agnostic functions:
+>>> def agnostic_func_one_col(lf_native: IntoFrameT) -> IntoFrameT:
+... lf = nw.from_native(lf_native)
+... return nw.to_native(lf.group_by("a").agg(nw.col("b").sum()).sort("a"))
+
>>> def agnostic_func_mult_col(lf_native: IntoFrameT) -> IntoFrameT:
+... lf = nw.from_native(lf_native)
+... return nw.to_native(lf.group_by("a", "b").agg(nw.sum("c")).sort("a", "b"))
+
We can then pass a lazy frame and materialise it with collect
:
>>> agnostic_func_one_col(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> agnostic_func_mult_col(lf_pl).collect()
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 8 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
narwhals.LazyFrame
Narwhals DataFrame, backed by a native dataframe.
+The native dataframe might be pandas.DataFrame, polars.LazyFrame, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
.
columns: list[str]
+
+
+ property
+
+
+Get column names.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
We define a library agnostic function:
+>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.columns
+
We can then pass either pandas or Polars to agnostic_columns
:
>>> agnostic_columns(df_pd)
+['foo', 'bar', 'ham']
+>>> agnostic_columns(lf_pl)
+['foo', 'bar', 'ham']
+
implementation: Implementation
+
+
+ property
+
+
+Return implementation of native frame.
+This can be useful when you need to some special-casing for +some libraries for features outside of Narwhals' scope - for +example, when dealing with pandas' Period Dtype.
+ + +Returns:
+Type | +Description | +
---|---|
+ Implementation
+ |
+
+
+
+ Implementation. + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> lf_native = pl.LazyFrame({"a": [1, 2, 3]})
+>>> lf = nw.from_native(lf_native)
+>>> lf.implementation
+<Implementation.POLARS: 6>
+>>> lf.implementation.is_pandas()
+False
+>>> lf.implementation.is_polars()
+True
+
schema: Schema
+
+
+ property
+
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf.schema
+Schema({'foo': Int64, 'bar': Float64, 'ham', String})
+
clone()
+
+Create a copy of this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we copy the DataFrame:
+>>> def agnostic_clone(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.clone().to_native()
+
>>> agnostic_clone(df_pd)
+ a b
+0 1 3
+1 2 4
+
>>> agnostic_clone(lf_pl).collect()
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
collect()
+
+Materialize this LazyFrame into a DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ DataFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": ["a", "b", "a", "b", "b", "c"],
+... "b": [1, 2, 3, 4, 5, 6],
+... "c": [6, 5, 4, 3, 2, 1],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf
+┌───────────────────────────────────────┐
+| Narwhals LazyFrame |
+| Use `.to_native` to see native output |
+└───────────────────────────────────────┘
+>>> df = lf.group_by("a").agg(nw.all().sum()).collect()
+>>> df.to_native().sort("a")
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 4 ┆ 10 │
+│ b ┆ 11 ┆ 10 │
+│ c ┆ 6 ┆ 1 │
+└─────┴─────┴─────┘
+
collect_schema()
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf.collect_schema()
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
drop(*columns, strict=True)
+
+Remove columns from the LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *columns
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Validate that all column names exist in the schema and throw an +exception if a column name does not exist in the schema. + |
+
+ True
+ |
+
strict
argument is ignored for polars<1.0.0
.
Please consider upgrading to a newer version or pass to eager mode.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.drop("ham").to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_drop(df_pd)
+ foo bar
+0 1 6.0
+1 2 7.0
+2 3 8.0
+>>> agnostic_drop(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Use positional arguments to drop multiple columns.
+>>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.drop("foo", "ham").to_native()
+
>>> agnostic_drop(df_pd)
+ bar
+0 6.0
+1 7.0
+2 8.0
+>>> agnostic_drop(lf_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
drop_nulls(subset=None)
+
+Drop null values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) for which null values are considered. If set to None +(default), use all columns. + |
+
+ None
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.drop_nulls().to_native()
+
We can then pass either pandas or Polars:
+>>> agnostic_drop_nulls(df_pd)
+ a ba
+0 1.0 1.0
+>>> agnostic_drop_nulls(lf_pl).collect()
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ ba │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 1.0 ┆ 1.0 │
+└─────┴─────┘
+
filter(*predicates, **constraints)
+
+Filter the rows in the LazyFrame based on a predicate expression.
+The original order of the remaining rows is preserved.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr] | list[bool]
+ |
+
+
+
+ Expression that evaluates to a boolean Series. Can +also be a (single!) boolean list. + |
+
+ ()
+ |
+
+ **constraints
+ |
+
+ Any
+ |
+
+
+
+ Column filters; use |
+
+ {}
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we filter on +one condition.
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter(nw.col("foo") > 1).to_native()
+
We can then pass either pandas or Polars to agnostic_filter
:
>>> agnostic_filter(df_pd)
+ foo bar ham
+1 2 7 b
+2 3 8 c
+>>> agnostic_filter(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_pl).collect()
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions:
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")).to_native()
+>>>
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter(
+... nw.col("foo") == 1,
+... nw.col("ham") == "a",
+... ).to_native()
+>>>
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Filter on an OR condition:
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter(
+... (nw.col("foo") == 1) | (nw.col("ham") == "c")
+... ).to_native()
+>>>
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+2 3 8 c
+>>> agnostic_filter(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_pl).collect()
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Provide multiple filters using **kwargs
syntax:
>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter(foo=2, ham="b").to_native()
+>>>
+>>> agnostic_filter(df_pd)
+ foo bar ham
+1 2 7 b
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+└─────┴─────┴─────┘
+
gather_every(n, offset=0)
+
+Take every nth row in the DataFrame and return as a new DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.gather_every(n=2, offset=1).to_native()
+
>>> agnostic_gather_every(df_pd)
+ a b
+1 2 6
+3 4 8
+
>>> agnostic_gather_every(lf_pl).collect()
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 6 │
+│ 4 ┆ 8 │
+└─────┴─────┘
+
group_by(*keys, drop_null_keys=False)
+
+Start a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts expression input. Strings are +parsed as column names. + |
+
+ ()
+ |
+
+ drop_null_keys
+ |
+
+ bool
+ |
+
+
+
+ if True, then groups where any key is null won't be +included in the result. + |
+
+ False
+ |
+
Examples:
+Group by one column and call agg
to compute the grouped sum of
+another column.
>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> df = {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we group by one column
+and call agg
to compute the grouped sum of another column.
>>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a").to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_group_by_agg(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> agnostic_group_by_agg(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> agnostic_group_by_agg(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... df.group_by(["a", "b"]).agg(nw.max("c")).sort(["a", "b"]).to_native()
+... )
+>>>
+>>> agnostic_group_by_agg(df_pd)
+ a b c
+0 a 1 5
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> agnostic_group_by_agg(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+>>> agnostic_group_by_agg(lf_pl).collect()
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
head(n=5)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 5
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function that gets the first 3 rows.
+>>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.head(3).to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_head(df_pd)
+ a b
+0 1 7
+1 2 8
+2 3 9
+>>> agnostic_head(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> agnostic_head(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
join(other, on=None, how='inner', *, left_on=None, right_on=None, suffix='_right')
+
+Add a join operation to the Logical Plan.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ Lazy DataFrame to join with. + |
+ + required + | +
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Name(s) of the join columns in both DataFrames. If set, |
+
+ None
+ |
+
+ how
+ |
+
+ Literal['inner', 'left', 'cross', 'semi', 'anti']
+ |
+
+
+
+ Join strategy. +
|
+
+ 'inner'
+ |
+
+ left_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the left DataFrame. + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the right DataFrame. + |
+
+ None
+ |
+
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to append to columns with a duplicate name. + |
+
+ '_right'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined LazyFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> data_other = {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+
>>> df_pd = pd.DataFrame(data)
+>>> other_pd = pd.DataFrame(data_other)
+
>>> lf_pl = pl.LazyFrame(data)
+>>> other_pl = pl.LazyFrame(data_other)
+
Let's define a dataframe-agnostic function in which we join over "ham" column:
+>>> def agnostic_join_on_ham(
+... df_native: IntoFrameT,
+... other_native: IntoFrameT,
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return df.join(other, left_on="ham", right_on="ham").to_native()
+
We can now pass either pandas or Polars to the function:
+>>> agnostic_join_on_ham(df_pd, other_pd)
+ foo bar ham apple
+0 1 6.0 a x
+1 2 7.0 b y
+
>>> agnostic_join_on_ham(lf_pl, other_pl).collect()
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
join_asof(other, *, left_on=None, right_on=None, on=None, by_left=None, by_right=None, by=None, strategy='backward')
+
+Perform an asof join.
+This is similar to a left-join except that we match on nearest key rather than equal keys.
+Both DataFrames must be sorted by the asof_join key.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
+ left_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+
+ None
+ |
+
+ on
+ |
+
+ str | None
+ |
+
+
+
+ Join column of both DataFrames. If set, left_on and right_on should be None. + |
+
+ None
+ |
+
+ by_left
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by_right
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['backward', 'forward', 'nearest']
+ |
+
+
+
+ Join strategy. The default is "backward". +
|
+
+ 'backward'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from typing import Literal
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data_gdp = {
+... "datetime": [
+... datetime(2016, 1, 1),
+... datetime(2017, 1, 1),
+... datetime(2018, 1, 1),
+... datetime(2019, 1, 1),
+... datetime(2020, 1, 1),
+... ],
+... "gdp": [4164, 4411, 4566, 4696, 4827],
+... }
+>>> data_population = {
+... "datetime": [
+... datetime(2016, 3, 1),
+... datetime(2018, 8, 1),
+... datetime(2019, 1, 1),
+... ],
+... "population": [82.19, 82.66, 83.12],
+... }
+>>> gdp_pd = pd.DataFrame(data_gdp)
+>>> population_pd = pd.DataFrame(data_population)
+>>> gdp_pl = pl.LazyFrame(data_gdp).sort("datetime")
+>>> population_pl = pl.LazyFrame(data_population).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" column:
+>>> def agnostic_join_asof_datetime(
+... df_native: IntoFrameT,
+... other_native: IntoFrameT,
+... strategy: Literal["backward", "forward", "nearest"],
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return df.join_asof(other, on="datetime", strategy=strategy).to_native()
+
We can now pass either pandas or Polars to the function:
+>>> agnostic_join_asof_datetime(population_pd, gdp_pd, strategy="backward")
+ datetime population gdp
+0 2016-03-01 82.19 4164
+1 2018-08-01 82.66 4566
+2 2019-01-01 83.12 4696
+
>>> agnostic_join_asof_datetime(
+... population_pl, gdp_pl, strategy="backward"
+... ).collect()
+shape: (3, 3)
+┌─────────────────────┬────────────┬──────┐
+│ datetime ┆ population ┆ gdp │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ f64 ┆ i64 │
+╞═════════════════════╪════════════╪══════╡
+│ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │
+│ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │
+│ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │
+└─────────────────────┴────────────┴──────┘
+
Here is a real-world times-series example that uses by
argument.
>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data_quotes = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 30),
+... datetime(2016, 5, 25, 13, 30, 0, 41),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 49),
+... datetime(2016, 5, 25, 13, 30, 0, 72),
+... datetime(2016, 5, 25, 13, 30, 0, 75),
+... ],
+... "ticker": [
+... "GOOG",
+... "MSFT",
+... "MSFT",
+... "MSFT",
+... "GOOG",
+... "AAPL",
+... "GOOG",
+... "MSFT",
+... ],
+... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
+... }
+>>> data_trades = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 38),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... ],
+... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+... "quantity": [75, 155, 100, 100, 100],
+... }
+>>> quotes_pd = pd.DataFrame(data_quotes)
+>>> trades_pd = pd.DataFrame(data_trades)
+>>> quotes_pl = pl.LazyFrame(data_quotes).sort("datetime")
+>>> trades_pl = pl.LazyFrame(data_trades).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns:
+>>> def agnostic_join_asof_datetime_by_ticker(
+... df_native: IntoFrameT,
+... other_native: IntoFrameT,
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return df.join_asof(other, on="datetime", by="ticker").to_native()
+
We can now pass either pandas or Polars to the function:
+>>> agnostic_join_asof_datetime_by_ticker(trades_pd, quotes_pd)
+ datetime ticker price quantity bid ask
+0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96
+1 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98
+2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93
+3 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93
+4 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN
+
>>> agnostic_join_asof_datetime_by_ticker(trades_pl, quotes_pl).collect()
+shape: (5, 6)
+┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐
+│ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │
+╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡
+│ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │
+│ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │
+└────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘
+
lazy()
+
+Lazify the DataFrame (if possible).
+If a library does not support lazy execution, then this is a no-op.
+ + +Examples:
+Construct pandas and Polars objects:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
We define a library agnostic function:
+>>> def agnostic_lazy(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.lazy().to_native()
+
Note that then, pandas dataframe stay eager, and the Polars LazyFrame stays lazy:
+>>> agnostic_lazy(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> agnostic_lazy(lf_pl)
+<LazyFrame ...>
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.pipe(lambda _df: _df.select("a")).to_native()
+
We can then pass either pandas or Polars:
+>>> agnostic_pipe(df_pd)
+ a
+0 1
+1 2
+2 3
+>>> agnostic_pipe(lf_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
rename(mapping)
+
+Rename column names.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mapping
+ |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name, or a + function that takes the old name as input and returns the + new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> def agnostic_rename(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.rename({"foo": "apple"}).to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_rename(df_pd)
+ apple bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> agnostic_rename(lf_pl).collect()
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
select(*exprs, **named_exprs)
+
+Select columns from this LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. +Accepts expression input. Strings are parsed as column names. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. +The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
If you'd like to select a column whose name isn't a string (for example,
+if you're working with pandas) then you should explicitly use nw.col
instead
+of just passing the column name. For example, to select a column named
+0
use df.select(nw.col(0))
, not df.select(0)
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we pass the name of a +column to select that column.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select("foo").to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_select(df_pd)
+ foo
+0 1
+1 2
+2 3
+>>> agnostic_select(df_pl)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> agnostic_select(lf_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(["foo", "bar"]).to_native()
+>>>
+>>> agnostic_select(df_pd)
+ foo bar
+0 1 6
+1 2 7
+2 3 8
+>>> agnostic_select(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+>>> agnostic_select(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo"), nw.col("bar") + 1).to_native()
+>>>
+>>> agnostic_select(df_pd)
+ foo bar
+0 1 7
+1 2 8
+2 3 9
+>>> agnostic_select(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> agnostic_select(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(threshold=nw.col("foo") * 2).to_native()
+>>>
+>>> agnostic_select(df_pd)
+ threshold
+0 2
+1 4
+2 6
+>>> agnostic_select(df_pl)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+>>> agnostic_select(lf_pl).collect()
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False, nulls_last=False)
+
+Sort the LazyFrame by the given columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ by
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
+ *more_by
+ |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional arguments. + |
+
+ ()
+ |
+
+ descending
+ |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple columns, can be +specified per column by passing a sequence of booleans. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last; can specify a single boolean applying to +all columns or a sequence of booleans for per-column control. + |
+
+ False
+ |
+
Unlike Polars, it is not possible to specify a sequence of booleans for
+nulls_last
in order to control per-column behaviour. Instead a single
+boolean is applied for all by
columns.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we sort by multiple +columns in different orders
+>>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.sort("c", "a", descending=[False, True]).to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_sort(df_pd)
+ a b c
+0 1.0 6.0 a
+2 NaN 4.0 b
+1 2.0 5.0 c
+>>> agnostic_sort(lf_pl).collect()
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
tail(n=5)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 5
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function that gets the last 3 rows.
+>>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.tail(3).to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_tail(df_pd)
+ a b
+3 4 10
+4 5 11
+5 6 12
+>>> agnostic_tail(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 4 ┆ 10 │
+│ 5 ┆ 11 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+>>> agnostic_tail(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 4 ┆ 10 │
+│ 5 ┆ 11 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+
to_native()
+
+Convert Narwhals LazyFrame to native one.
+ + +Returns:
+Type | +Description | +
---|---|
+ FrameT
+ |
+
+
+
+ Object of class that user started with. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>>
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+>>> df_pa = pa.table(data)
+
Calling to_native
on a Narwhals DataFrame returns the native object:
>>> nw.from_native(df_pd).lazy().to_native()
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> nw.from_native(lf_pl).to_native().collect()
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 7.0 ┆ b │
+│ 3 ┆ 8.0 ┆ c │
+└─────┴─────┴─────┘
+
unique(subset=None, *, keep='any', maintain_order=False)
+
+Drop duplicate rows from this LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows.
+ If set to |
+
+ None
+ |
+
+ keep
+ |
+
+ Literal['any', 'first', 'last', 'none']
+ |
+
+
+
+ {'first', 'last', 'any', 'none'} +Which of the duplicate rows to keep. +
|
+
+ 'any'
+ |
+
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original DataFrame. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ LazyFrame with unique rows. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.unique(["bar", "ham"]).to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_unique(df_pd)
+ foo bar ham
+0 1 a b
+>>> agnostic_unique(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
unpivot(on=None, *, index=None, variable_name=None, value_name=None)
+
+Unpivot a DataFrame from wide to long format.
+Optionally leaves identifiers set.
+This function is useful to massage a DataFrame into a format where one or more +columns are identifier variables (index) while all other columns, considered +measured variables (on), are "unpivoted" to the row axis leaving just +two non-identifier columns, 'variable' and 'value'.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as values variables; if |
+
+ None
+ |
+
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as identifier variables. + |
+
+ None
+ |
+
+ variable_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
+ value_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
If you're coming from pandas, this is similar to pandas.DataFrame.melt
,
+but with index
replacing id_vars
and on
replacing value_vars
.
+In other frameworks, you might know this operation as pivot_longer
.
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": ["x", "y", "z"],
+... "b": [1, 3, 5],
+... "c": [2, 4, 6],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> def agnostic_unpivot(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... df.unpivot(on=["b", "c"], index="a").sort(["variable", "a"])
+... ).to_native()
+
>>> agnostic_unpivot(lf_pl).collect()
+shape: (6, 3)
+┌─────┬──────────┬───────┐
+│ a ┆ variable ┆ value │
+│ --- ┆ --- ┆ --- │
+│ str ┆ str ┆ i64 │
+╞═════╪══════════╪═══════╡
+│ x ┆ b ┆ 1 │
+│ y ┆ b ┆ 3 │
+│ z ┆ b ┆ 5 │
+│ x ┆ c ┆ 2 │
+│ y ┆ c ┆ 4 │
+│ z ┆ c ┆ 6 │
+└─────┴──────────┴───────┘
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this LazyFrame.
+Added columns will replace existing columns with the same name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ A new LazyFrame with the columns added. + |
+
Creating a new LazyFrame using this method does not create a new copy of +existing data.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> df = {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we pass an expression +to add it as a new column:
+>>> def agnostic_with_columns(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns((nw.col("a") * 2).alias("2a")).to_native()
+
We can then pass either pandas or Polars to func
:
>>> agnostic_with_columns(df_pd)
+ a b c 2a
+0 1 0.5 True 2
+1 2 4.0 True 4
+2 3 10.0 False 6
+3 4 13.0 True 8
+>>> agnostic_with_columns(df_pl)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ 2a │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+>>> agnostic_with_columns(lf_pl).collect()
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ 2a │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
with_row_index(name='index')
+
+Insert column which enumerates rows.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_with_row_index(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_row_index().to_native()
+
We can then pass either pandas or Polars:
+>>> agnostic_with_row_index(df_pd)
+ index a b
+0 0 1 4
+1 1 2 5
+2 2 3 6
+>>> agnostic_with_row_index(lf_pl).collect()
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ index ┆ a ┆ b │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ i64 ┆ i64 │
+╞═══════╪═════╪═════╡
+│ 0 ┆ 1 ┆ 4 │
+│ 1 ┆ 2 ┆ 5 │
+│ 2 ┆ 3 ┆ 6 │
+└───────┴─────┴─────┘
+
Here are the top-level functions available in Narwhals.
+ + +all()
+
+Instantiate an expression representing all columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> df_pa = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all() * 2).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 2 8
+1 4 10
+2 6 12
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 8 │
+│ 4 ┆ 10 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[2,4,6]]
+b: [[8,10,12]]
+
all_horizontal(*exprs)
+
+Compute the bitwise AND horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": [False, False, True, True, False, None],
+... "b": [False, True, True, None, None, None],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow")
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select("a", "b", all=nw.all_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b all
+0 False False False
+1 False True False
+2 True True True
+3 True <NA> <NA>
+4 False <NA> False
+5 <NA> <NA> <NA>
+
>>> my_library_agnostic_function(df_pl)
+shape: (6, 3)
+┌───────┬───────┬───────┐
+│ a ┆ b ┆ all │
+│ --- ┆ --- ┆ --- │
+│ bool ┆ bool ┆ bool │
+╞═══════╪═══════╪═══════╡
+│ false ┆ false ┆ false │
+│ false ┆ true ┆ false │
+│ true ┆ true ┆ true │
+│ true ┆ null ┆ null │
+│ false ┆ null ┆ false │
+│ null ┆ null ┆ null │
+└───────┴───────┴───────┘
+
>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+all: bool
+----
+a: [[false,false,true,true,false,null]]
+b: [[false,true,true,null,null,null]]
+all: [[false,false,true,null,false,null]]
+
any_horizontal(*exprs)
+
+Compute the bitwise OR horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": [False, False, True, True, False, None],
+... "b": [False, True, True, None, None, None],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow")
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select("a", "b", any=nw.any_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b any
+0 False False False
+1 False True True
+2 True True True
+3 True <NA> True
+4 False <NA> <NA>
+5 <NA> <NA> <NA>
+
>>> my_library_agnostic_function(df_pl)
+shape: (6, 3)
+┌───────┬───────┬───────┐
+│ a ┆ b ┆ any │
+│ --- ┆ --- ┆ --- │
+│ bool ┆ bool ┆ bool │
+╞═══════╪═══════╪═══════╡
+│ false ┆ false ┆ false │
+│ false ┆ true ┆ true │
+│ true ┆ true ┆ true │
+│ true ┆ null ┆ true │
+│ false ┆ null ┆ null │
+│ null ┆ null ┆ null │
+└───────┴───────┴───────┘
+
>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+any: bool
+----
+a: [[false,false,true,true,false,null]]
+b: [[false,true,true,null,null,null]]
+any: [[false,true,true,true,null,null]]
+
col(*names)
+
+Creates an expression that references one or more columns by their name(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ names
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [3, 4]})
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a") * nw.col("b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 3
+1 8
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 3 │
+│ 8 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[3,8]]
+
concat(items, *, how='vertical')
+
+Concatenate multiple DataFrames, LazyFrames into a single entity.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ items
+ |
+
+ Iterable[FrameT]
+ |
+
+
+
+ DataFrames, LazyFrames to concatenate. + |
+ + required + | +
+ how
+ |
+
+ Literal['horizontal', 'vertical', 'diagonal']
+ |
+
+
+
+ concatenating strategy: +
|
+
+ 'vertical'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ FrameT
+ |
+
+
+
+ A new DataFrame, Lazyframe resulting from the concatenation. + |
+
Raises:
+Type | +Description | +
---|---|
+ TypeError
+ |
+
+
+
+ The items to concatenate should either all be eager, or all lazy + |
+
Examples:
+Let's take an example of vertical concatenation:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> data_2 = {"a": [5, 2], "b": [1, 4]}
+
>>> df_pd_1 = pd.DataFrame(data_1)
+>>> df_pd_2 = pd.DataFrame(data_2)
+>>> df_pl_1 = pl.DataFrame(data_1)
+>>> df_pl_2 = pl.DataFrame(data_2)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def agnostic_vertical_concat(df1, df2):
+... return nw.concat([df1, df2], how="vertical")
+
>>> agnostic_vertical_concat(df_pd_1, df_pd_2)
+ a b
+0 1 4
+1 2 5
+2 3 6
+0 5 1
+1 2 4
+>>> agnostic_vertical_concat(df_pl_1, df_pl_2)
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+│ 5 ┆ 1 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
Let's look at case a for horizontal concatenation:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> data_2 = {"c": [5, 2], "d": [1, 4]}
+
>>> df_pd_1 = pd.DataFrame(data_1)
+>>> df_pd_2 = pd.DataFrame(data_2)
+>>> df_pl_1 = pl.DataFrame(data_1)
+>>> df_pl_2 = pl.DataFrame(data_2)
+
Defining a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def agnostic_horizontal_concat(df1, df2):
+... return nw.concat([df1, df2], how="horizontal")
+
>>> agnostic_horizontal_concat(df_pd_1, df_pd_2)
+ a b c d
+0 1 4 5.0 1.0
+1 2 5 2.0 4.0
+2 3 6 NaN NaN
+
>>> agnostic_horizontal_concat(df_pl_1, df_pl_2)
+shape: (3, 4)
+┌─────┬─────┬──────┬──────┐
+│ a ┆ b ┆ c ┆ d │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪══════╪══════╡
+│ 1 ┆ 4 ┆ 5 ┆ 1 │
+│ 2 ┆ 5 ┆ 2 ┆ 4 │
+│ 3 ┆ 6 ┆ null ┆ null │
+└─────┴─────┴──────┴──────┘
+
Let's look at case a for diagonal concatenation:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data_1 = {"a": [1, 2], "b": [3.5, 4.5]}
+>>> data_2 = {"a": [3, 4], "z": ["x", "y"]}
+
>>> df_pd_1 = pd.DataFrame(data_1)
+>>> df_pd_2 = pd.DataFrame(data_2)
+>>> df_pl_1 = pl.DataFrame(data_1)
+>>> df_pl_2 = pl.DataFrame(data_2)
+
Defining a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def agnostic_diagonal_concat(df1, df2):
+... return nw.concat([df1, df2], how="diagonal")
+
>>> agnostic_diagonal_concat(df_pd_1, df_pd_2)
+ a b z
+0 1 3.5 NaN
+1 2 4.5 NaN
+0 3 NaN x
+1 4 NaN y
+
>>> agnostic_diagonal_concat(df_pl_1, df_pl_2)
+shape: (4, 3)
+┌─────┬──────┬──────┐
+│ a ┆ b ┆ z │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞═════╪══════╪══════╡
+│ 1 ┆ 3.5 ┆ null │
+│ 2 ┆ 4.5 ┆ null │
+│ 3 ┆ null ┆ x │
+│ 4 ┆ null ┆ y │
+└─────┴──────┴──────┘
+
concat_str(exprs, *more_exprs, separator='', ignore_nulls=False)
+
+Horizontally concatenate columns into a single string column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Columns to concatenate into a single string column. Accepts expression
+input. Strings are parsed as column names, other non-expression inputs are
+parsed as literals. Non- |
+ + required + | +
+ *more_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to concatenate into a single string column, +specified as positional arguments. + |
+
+ ()
+ |
+
+ separator
+ |
+
+ str
+ |
+
+
+
+ String that will be used to separate the values of each column. + |
+
+ ''
+ |
+
+ ignore_nulls
+ |
+
+ bool
+ |
+
+
+
+ Ignore null values (default is |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [1, 2, 3],
+... "b": ["dogs", "cats", None],
+... "c": ["play", "swim", "walk"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal string +concatenation of different columns
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.concat_str(
+... [
+... nw.col("a") * 2,
+... nw.col("b"),
+... nw.col("c"),
+... ],
+... separator=" ",
+... ).alias("full_sentence")
+... ).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
+ full_sentence
+0 2 dogs play
+1 4 cats swim
+2 None
+
>>> my_library_agnostic_function(pl.DataFrame(data))
+shape: (3, 1)
+┌───────────────┐
+│ full_sentence │
+│ --- │
+│ str │
+╞═══════════════╡
+│ 2 dogs play │
+│ 4 cats swim │
+│ null │
+└───────────────┘
+
>>> my_library_agnostic_function(pa.table(data))
+pyarrow.Table
+full_sentence: string
+----
+full_sentence: [["2 dogs play","4 cats swim",null]]
+
from_arrow(native_frame, *, native_namespace)
+
+Construct a DataFrame from an object which supports the PyCapsule Interface.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_frame
+ |
+
+ ArrowStreamExportable
+ |
+
+
+
+ Object which implements |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's define a dataframe-agnostic function which creates a PyArrow +Table.
+>>> def agnostic_to_arrow(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return nw.from_arrow(df, native_namespace=pa).to_native()
+
Let's see what happens when passing pandas / Polars input:
+>>> agnostic_to_arrow(pd.DataFrame(data))
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+>>> agnostic_to_arrow(pl.DataFrame(data))
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+
from_dict(data, schema=None, *, native_namespace=None)
+
+Instantiate DataFrame from dictionary.
+Indexes (if present, for pandas-like backends) are aligned following +the left-hand-rule.
+ + +For pandas-like dataframes, conversion to schema is applied after dataframe +creation.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+
+ dict[str, Any]
+ |
+
+
+
+ Dictionary to create DataFrame from. + |
+ + required + | +
+ schema
+ |
+
+ dict[str, DType] | Schema | None
+ |
+
+
+
+ The DataFrame schema as Schema or dict of {name: type}. + |
+
+ None
+ |
+
+ native_namespace
+ |
+
+ ModuleType | None
+ |
+
+
+
+ The native library to use for DataFrame creation. Only +necessary if inputs are not Narwhals Series. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data:
+>>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT:
+... new_data = {"c": [5, 2], "d": [1, 4]}
+... native_namespace = nw.get_native_namespace(df_native)
+... return nw.from_dict(new_data, native_namespace=native_namespace).to_native()
+
Let's see what happens when passing pandas, Polars or PyArrow input:
+>>> agnostic_from_dict(pd.DataFrame(data))
+ c d
+0 5 1
+1 2 4
+>>> agnostic_from_dict(pl.DataFrame(data))
+shape: (2, 2)
+┌─────┬─────┐
+│ c ┆ d │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 5 ┆ 1 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+>>> agnostic_from_dict(pa.table(data))
+pyarrow.Table
+c: int64
+d: int64
+----
+c: [[5,2]]
+d: [[1,4]]
+
from_native(native_object, *, strict=None, pass_through=None, eager_only=False, eager_or_interchange_only=False, series_only=False, allow_series=None)
+
+Convert native_object
to Narwhals Dataframe, Lazyframe, or Series.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_object
+ |
+
+ IntoFrameT | IntoSeries | T
+ |
+
+
+
+ Raw object from user. +Depending on the other arguments, input object can be: +
|
+ + required + | +
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if the object can't be converted to Narwhals: +
Deprecated (v1.13.0):
+ Please use |
+
+ None
+ |
+
+ pass_through
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if the object can't be converted to Narwhals: +
|
+
+ None
+ |
+
+ eager_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow eager objects: +
|
+
+ False
+ |
+
+ eager_or_interchange_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow eager objects or objects which +have interchange-level support in Narwhals: +
See interchange-only support +for more details. + |
+
+ False
+ |
+
+ series_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow Series: +
|
+
+ False
+ |
+
+ allow_series
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to allow Series (default is only Dataframe / Lazyframe): +
|
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T
+ |
+
+
+
+ DataFrame, LazyFrame, Series, or original object, depending +on which combination of parameters was passed. + |
+
from_numpy(data, schema=None, *, native_namespace)
+
+Construct a DataFrame from a NumPy ndarray.
+ + +Only row orientation is currently supported.
+For pandas-like dataframes, conversion to schema is applied after dataframe +creation.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+
+ ndarray
+ |
+
+
+
+ Two-dimensional data represented as a NumPy ndarray. + |
+ + required + | +
+ schema
+ |
+
+ dict[str, DType] | Schema | list[str] | None
+ |
+
+
+
+ The DataFrame schema as Schema, dict of {name: type}, or a list of str. + |
+
+ None
+ |
+
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> import numpy as np
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2], "b": [3, 4]}
+
Let's create a new dataframe of the same class as the dataframe we started with, from a NumPy ndarray of new data:
+>>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT:
+... new_data = np.array([[5, 2, 1], [1, 4, 3]])
+... df = nw.from_native(df_native)
+... native_namespace = nw.get_native_namespace(df)
+... return nw.from_numpy(new_data, native_namespace=native_namespace).to_native()
+
Let's see what happens when passing pandas, Polars or PyArrow input:
+>>> agnostic_from_numpy(pd.DataFrame(data))
+ column_0 column_1 column_2
+0 5 2 1
+1 1 4 3
+>>> agnostic_from_numpy(pl.DataFrame(data))
+shape: (2, 3)
+┌──────────┬──────────┬──────────┐
+│ column_0 ┆ column_1 ┆ column_2 │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞══════════╪══════════╪══════════╡
+│ 5 ┆ 2 ┆ 1 │
+│ 1 ┆ 4 ┆ 3 │
+└──────────┴──────────┴──────────┘
+>>> agnostic_from_numpy(pa.table(data))
+pyarrow.Table
+column_0: int64
+column_1: int64
+column_2: int64
+----
+column_0: [[5,1]]
+column_1: [[2,4]]
+column_2: [[1,3]]
+
Let's specify the column names:
+>>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT:
+... new_data = np.array([[5, 2, 1], [1, 4, 3]])
+... schema = ["c", "d", "e"]
+... df = nw.from_native(df_native)
+... native_namespace = nw.get_native_namespace(df)
+... return nw.from_numpy(
+... new_data, native_namespace=native_namespace, schema=schema
+... ).to_native()
+
Let's see the modified outputs:
+>>> agnostic_from_numpy(pd.DataFrame(data))
+ c d e
+0 5 2 1
+1 1 4 3
+>>> agnostic_from_numpy(pl.DataFrame(data))
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ c ┆ d ┆ e │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ 5 ┆ 2 ┆ 1 │
+│ 1 ┆ 4 ┆ 3 │
+└─────┴─────┴─────┘
+>>> agnostic_from_numpy(pa.table(data))
+pyarrow.Table
+c: int64
+d: int64
+e: int64
+----
+c: [[5,1]]
+d: [[2,4]]
+e: [[1,3]]
+
Let's modify the function so that it specifies the schema:
+>>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT:
+... new_data = np.array([[5, 2, 1], [1, 4, 3]])
+... schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()}
+... df = nw.from_native(df_native)
+... native_namespace = nw.get_native_namespace(df)
+... return nw.from_numpy(
+... new_data, native_namespace=native_namespace, schema=schema
+... ).to_native()
+
Let's see the outputs:
+>>> agnostic_from_numpy(pd.DataFrame(data))
+ c d e
+0 5 2.0 1
+1 1 4.0 3
+>>> agnostic_from_numpy(pl.DataFrame(data))
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ c ┆ d ┆ e │
+│ --- ┆ --- ┆ --- │
+│ i16 ┆ f32 ┆ i8 │
+╞═════╪═════╪═════╡
+│ 5 ┆ 2.0 ┆ 1 │
+│ 1 ┆ 4.0 ┆ 3 │
+└─────┴─────┴─────┘
+>>> agnostic_from_numpy(pa.table(data))
+pyarrow.Table
+c: int16
+d: float
+e: int8
+----
+c: [[5,1]]
+d: [[2,4]]
+e: [[1,3]]
+
generate_temporary_column_name(n_bytes, columns)
+
+Generates a unique column name that is not present in the given list of columns.
+It relies on python secrets token_hex +function to return a string nbytes random bytes.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n_bytes
+ |
+
+ int
+ |
+
+
+
+ The number of bytes to generate for the token. + |
+ + required + | +
+ columns
+ |
+
+ list[str]
+ |
+
+
+
+ The list of columns to check for uniqueness. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ str
+ |
+
+
+
+ A unique token that is not present in the given list of columns. + |
+
Raises:
+Type | +Description | +
---|---|
+ AssertionError
+ |
+
+
+
+ If a unique token cannot be generated after 100 attempts. + |
+
Examples:
+>>> import narwhals as nw
+>>> columns = ["abc", "xyz"]
+>>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
+True
+
get_level(obj)
+
+Level of support Narwhals has for current object.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ DataFrame[Any] | LazyFrame[Any] | Series[IntoSeriesT]
+ |
+
+
+
+ Dataframe or Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Literal['full', 'lazy', 'interchange']
+ |
+
+
+
+ This can be one of: +
|
+
get_native_namespace(obj)
+
+Get native namespace from object.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ DataFrame[Any] | LazyFrame[Any] | Series[Any] | DataFrame | Series | DataFrame | LazyFrame | Series | Table | ChunkedArray
+ |
+
+
+
+ Dataframe, Lazyframe, or Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ Native module. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+>>> nw.get_native_namespace(df)
+<module 'pandas'...>
+>>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]}))
+>>> nw.get_native_namespace(df)
+<module 'polars'...>
+
is_ordered_categorical(series)
+
+Return whether indices of categories are semantically meaningful.
+This is a convenience function to accessing what would otherwise be
+the is_ordered
property from the DataFrame Interchange Protocol,
+see https://data-apis.org/dataframe-protocol/latest/API.html.
dtype.ordering == "physical"
.dtype.cat.ordered == True
.dtype.type.ordered == True
.Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ series
+ |
+
+ Series[Any]
+ |
+
+
+
+ Input Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+ Whether the Series is an ordered categorical. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = ["x", "y"]
+>>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True))
+>>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical"))
+
Let's define a library-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return nw.is_ordered_categorical(s)
+
Then, we can pass any supported library to func
:
>>> func(s_pd)
+True
+>>> func(s_pl)
+True
+
len()
+
+Return the number of rows.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.len()).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ len
+0 2
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 1)
+┌─────┐
+│ len │
+│ --- │
+│ u32 │
+╞═════╡
+│ 2 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+len: int64
+----
+len: [[2]]
+
lit(value, dtype=None)
+
+Return an expression representing a literal value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any
+ |
+
+
+
+ The value to use as literal. + |
+ + required + | +
+ dtype
+ |
+
+ DType | None
+ |
+
+
+
+ The data type of the literal value. If not provided, the data type will be inferred. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pl = pl.DataFrame({"a": [1, 2]})
+>>> df_pd = pd.DataFrame({"a": [1, 2]})
+>>> df_pa = pa.table({"a": [1, 2]})
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(nw.lit(3)).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a literal
+0 1 3
+1 2 3
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 2)
+┌─────┬─────────┐
+│ a ┆ literal │
+│ --- ┆ --- │
+│ i64 ┆ i32 │
+╞═════╪═════════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 3 │
+└─────┴─────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+literal: int64
+----
+a: [[1,2]]
+literal: [[3,3]]
+
max(*columns)
+
+Return the maximum value.
+ + +Syntactic sugar for nw.col(columns).max()
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.max("a")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 2
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2]]
+
max_horizontal(*exprs)
+
+Get the maximum value horizontally across columns.
+ + +We support max_horizontal
over numeric columns only.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal max of "a" +and "b" columns:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.max_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
+ a
+0 4.0
+1 8.0
+2 3.0
+>>> my_library_agnostic_function(pl.DataFrame(data))
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 4 │
+│ 8 │
+│ 3 │
+└─────┘
+>>> my_library_agnostic_function(pa.table(data))
+pyarrow.Table
+a: int64
+----
+a: [[4,8,3]]
+
maybe_align_index(lhs, rhs)
+
+Align lhs
to the Index of rhs
, if they're both pandas-like.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lhs
+ |
+
+ FrameOrSeriesT
+ |
+
+
+
+ Dataframe or Series. + |
+ + required + | +
+ rhs
+ |
+
+ Series[Any] | DataFrame[Any] | LazyFrame[Any]
+ |
+
+
+
+ Dataframe or Series to align with. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ FrameOrSeriesT
+ |
+
+
+
+ Same type as input. + |
+
This is only really intended for backwards-compatibility purposes,
+for example if your library already aligns indices for users.
+If you're designing a new library, we highly encourage you to not
+rely on the Index.
+For non-pandas-like inputs, this only checks that lhs
and rhs
+are the same length.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4])
+>>> s_pd = pd.Series([6, 7], index=[4, 3])
+>>> df = nw.from_native(df_pd)
+>>> s = nw.from_native(s_pd, series_only=True)
+>>> nw.to_native(nw.maybe_align_index(df, s))
+ a
+4 2
+3 1
+
maybe_convert_dtypes(obj, *args, **kwargs)
+
+Convert columns or series to the best possible dtypes using dtypes supporting pd.NA
, if df is pandas-like.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ FrameOrSeriesT
+ |
+
+
+
+ DataFrame or Series. + |
+ + required + | +
+ *args
+ |
+
+ bool
+ |
+
+
+
+ Additional arguments which gets passed through. + |
+
+ ()
+ |
+
+ **kwargs
+ |
+
+ bool | str
+ |
+
+
+
+ Additional arguments which gets passed through. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ FrameOrSeriesT
+ |
+
+
+
+ Same type as input. + |
+
For non-pandas-like inputs, this is a no-op.
+Also, args
and kwargs
just get passed down to the underlying library as-is.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> import numpy as np
+>>> df_pd = pd.DataFrame(
+... {
+... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
+... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
+... }
+... )
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes
+a Int32
+b boolean
+dtype: object
+
maybe_get_index(obj)
+
+Get the index of a DataFrame or a Series, if it's pandas-like.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ DataFrame[Any] | LazyFrame[Any] | Series[Any]
+ |
+
+
+
+ Dataframe or Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any | None
+ |
+
+
+
+ Same type as input. + |
+
This is only really intended for backwards-compatibility purposes,
+for example if your library already aligns indices for users.
+If you're designing a new library, we highly encourage you to not
+rely on the Index.
+For non-pandas-like inputs, this returns None
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df = nw.from_native(df_pd)
+>>> nw.maybe_get_index(df)
+RangeIndex(start=0, stop=2, step=1)
+>>> series_pd = pd.Series([1, 2])
+>>> series = nw.from_native(series_pd, series_only=True)
+>>> nw.maybe_get_index(series)
+RangeIndex(start=0, stop=2, step=1)
+
maybe_reset_index(obj)
+
+Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ FrameOrSeriesT
+ |
+
+
+
+ Dataframe or Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ FrameOrSeriesT
+ |
+
+
+
+ Same type as input. + |
+
This is only really intended for backwards-compatibility purposes, +for example if your library already resets the index for users. +If you're designing a new library, we highly encourage you to not +rely on the Index. +For non-pandas-like inputs, this is a no-op.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7]))
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_reset_index(df))
+ a b
+0 1 4
+1 2 5
+>>> series_pd = pd.Series([1, 2])
+>>> series = nw.from_native(series_pd, series_only=True)
+>>> nw.maybe_get_index(series)
+RangeIndex(start=0, stop=2, step=1)
+
maybe_set_index(obj, column_names=None, *, index=None)
+
+Set the index of a DataFrame or a Series, if it's pandas-like.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ FrameOrSeriesT
+ |
+
+
+
+ object for which maybe set the index (can be either a Narwhals |
+ + required + | +
+ column_names
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ name or list of names of the columns to set as index.
+For dataframes, only one of |
+
+ None
+ |
+
+ index
+ |
+
+ Series[IntoSeriesT] | list[Series[IntoSeriesT]] | None
+ |
+
+
+
+ series or list of series to set as index. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ FrameOrSeriesT
+ |
+
+
+
+ Same type as input. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If one of the following condition happens: +
|
+
This is only really intended for backwards-compatibility purposes, for example if +your library already aligns indices for users. +If you're designing a new library, we highly encourage you to not +rely on the Index.
+For non-pandas-like inputs, this is a no-op.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_set_index(df, "b"))
+ a
+b
+4 1
+5 2
+
mean(*columns)
+
+Get the mean value.
+ + +Syntactic sugar for nw.col(columns).mean()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pl = pl.DataFrame({"a": [1, 8, 3]})
+>>> df_pd = pd.DataFrame({"a": [1, 8, 3]})
+>>> df_pa = pa.table({"a": [1, 8, 3]})
+
We define a dataframe agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.mean("a")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 4.0
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 4.0 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[4]]
+
mean_horizontal(*exprs)
+
+Compute the mean of all values horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function that computes the horizontal mean of "a" +and "b" columns:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.mean_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 2.5
+1 6.5
+2 3.0
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2.5 │
+│ 6.5 │
+│ 3.0 │
+└─────┘
+
>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[2.5,6.5,3]]
+
median(*columns)
+
+Get the median value.
+ + +nw.col(columns).median()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [4, 5, 2]})
+>>> df_pl = pl.DataFrame({"a": [4, 5, 2]})
+>>> df_pa = pa.table({"a": [4, 5, 2]})
+
Let's define a dataframe agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.median("a")).to_native()
+
We can then pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 4.0
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 4.0 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[4]]
+
min(*columns)
+
+Return the minimum value.
+ + +Syntactic sugar for nw.col(columns).min()
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.min("b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ b
+0 5
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 1)
+┌─────┐
+│ b │
+│ --- │
+│ i64 │
+╞═════╡
+│ 5 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+b: int64
+----
+b: [[5]]
+
min_horizontal(*exprs)
+
+Get the minimum value horizontally across columns.
+ + +We support min_horizontal
over numeric columns only.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal min of "a" +and "b" columns:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.min_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
+ a
+0 1.0
+1 5.0
+2 3.0
+>>> my_library_agnostic_function(pl.DataFrame(data))
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 5 │
+│ 3 │
+└─────┘
+>>> my_library_agnostic_function(pa.table(data))
+pyarrow.Table
+a: int64
+----
+a: [[1,5,3]]
+
narwhalify(func=None, *, strict=None, pass_through=None, eager_only=False, eager_or_interchange_only=False, series_only=False, allow_series=True)
+
+Decorate function so it becomes dataframe-agnostic.
+This will try to convert any dataframe/series-like object into the Narwhals
+respective DataFrame/Series, while leaving the other parameters as they are.
+Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be
+converted back to the original dataframe/series type, while if the output is another
+type it will be left as is.
+By setting pass_through=False
, then every input and every output will be required to be a
+dataframe/series-like object.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ func
+ |
+
+ Callable[..., Any] | None
+ |
+
+
+
+ Function to wrap in a |
+
+ None
+ |
+
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ Deprecated (v1.13.0):
+Please use Determine what happens if the object can't be converted to Narwhals: +
|
+
+ None
+ |
+
+ pass_through
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if the object can't be converted to Narwhals: +
|
+
+ None
+ |
+
+ eager_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow eager objects: +
|
+
+ False
+ |
+
+ eager_or_interchange_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow eager objects or objects which +have interchange-level support in Narwhals: +
See interchange-only support +for more details. + |
+
+ False
+ |
+
+ series_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow Series: +
|
+
+ False
+ |
+
+ allow_series
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to allow Series (default is only Dataframe / Lazyframe): +
|
+
+ True
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Callable[..., Any]
+ |
+
+
+
+ Decorated function. + |
+
Examples:
+Instead of writing
+>>> import narwhals as nw
+>>> def agnostic_group_by_sum(df):
+... df = nw.from_native(df, pass_through=True)
+... df = df.group_by("a").agg(nw.col("b").sum())
+... return nw.to_native(df)
+
you can just write
+>>> @nw.narwhalify
+... def agnostic_group_by_sum(df):
+... return df.group_by("a").agg(nw.col("b").sum())
+
new_series(name, values, dtype=None, *, native_namespace)
+
+Instantiate Narwhals Series from iterable (e.g. list or array).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ Name of resulting Series. + |
+ + required + | +
+ values
+ |
+
+ Any
+ |
+
+
+
+ Values of make Series from. + |
+ + required + | +
+ dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ (Narwhals) dtype. If not provided, the native library
+may auto-infer it from |
+
+ None
+ |
+
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Series[Any]
+ |
+
+
+
+ A new Series + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT, IntoSeriesT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_new_series(df_native: IntoFrameT) -> IntoSeriesT:
+... values = [4, 1, 2, 3]
+... native_namespace = nw.get_native_namespace(df_native)
+... return nw.new_series(
+... name="a",
+... values=values,
+... dtype=nw.Int32,
+... native_namespace=native_namespace,
+... ).to_native()
+
We can then pass any supported eager library, such as pandas / Polars / PyArrow:
+>>> agnostic_new_series(pd.DataFrame(data))
+0 4
+1 1
+2 2
+3 3
+Name: a, dtype: int32
+>>> agnostic_new_series(pl.DataFrame(data))
+shape: (4,)
+Series: 'a' [i32]
+[
+ 4
+ 1
+ 2
+ 3
+]
+>>> agnostic_new_series(pa.table(data))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 4,
+ 1,
+ 2,
+ 3
+ ]
+]
+
nth(*indices)
+
+Creates an expression that references one or more columns by their index(es).
+ + +nth
is not supported for Polars version<1.0.0. Please use col
instead.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ indices
+ |
+
+ int | Sequence[int]
+ |
+
+
+
+ One or more indices representing the columns to retrieve. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.nth(0) * 2).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 2
+1 4
+>>> my_library_agnostic_function(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 4 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,4]]
+
read_csv(source, *, native_namespace, **kwargs)
+
+Read a CSV file into a DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ source
+ |
+
+ str
+ |
+
+
+
+ Path to a file. + |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Extra keyword arguments which are passed to the native CSV reader.
+For example, you could use
+ |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from types import ModuleType
+
Let's create an agnostic function that reads a csv file with a specified native namespace:
+>>> def agnostic_read_csv(native_namespace: ModuleType) -> IntoDataFrame:
+... return nw.read_csv("file.csv", native_namespace=native_namespace).to_native()
+
Then we can read the file by passing pandas, Polars or PyArrow namespaces:
+>>> agnostic_read_csv(native_namespace=pd)
+ a b
+0 1 4
+1 2 5
+2 3 6
+>>> agnostic_read_csv(native_namespace=pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+>>> agnostic_read_csv(native_namespace=pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+
read_parquet(source, *, native_namespace, **kwargs)
+
+Read into a DataFrame from a parquet file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ source
+ |
+
+ str
+ |
+
+
+
+ Path to a file. + |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Extra keyword arguments which are passed to the native parquet reader.
+For example, you could use
+ |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from types import ModuleType
+
Let's create an agnostic function that reads a parquet file with a specified native namespace:
+>>> def agnostic_read_parquet(native_namespace: ModuleType) -> IntoDataFrame:
+... return nw.read_parquet(
+... "file.parquet", native_namespace=native_namespace
+... ).to_native()
+
Then we can read the file by passing pandas, Polars or PyArrow namespaces:
+>>> agnostic_read_parquet(native_namespace=pd)
+ a b
+0 1 4
+1 2 5
+2 3 6
+>>> agnostic_read_parquet(native_namespace=pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+>>> agnostic_read_parquet(native_namespace=pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+
scan_csv(source, *, native_namespace, **kwargs)
+
+Lazily read from a CSV file.
+For the libraries that do not support lazy dataframes, the function reads +a csv file eagerly and then converts the resulting dataframe to a lazyframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ source
+ |
+
+ str
+ |
+
+
+
+ Path to a file. + |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Extra keyword arguments which are passed to the native CSV reader.
+For example, you could use
+ |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyFrame[Any]
+ |
+
+
+
+ LazyFrame. + |
+
Examples:
+>>> import dask.dataframe as dd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> from types import ModuleType
+
Let's create an agnostic function that lazily reads a csv file with a specified native namespace:
+>>> def agnostic_scan_csv(native_namespace: ModuleType) -> IntoFrame:
+... return nw.scan_csv("file.csv", native_namespace=native_namespace).to_native()
+
Then we can read the file by passing, for example, Polars or Dask namespaces:
+>>> agnostic_scan_csv(native_namespace=pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+>>> agnostic_scan_csv(native_namespace=dd).compute()
+ a b
+0 1 4
+1 2 5
+2 3 6
+
scan_parquet(source, *, native_namespace, **kwargs)
+
+Lazily read from a parquet file.
+For the libraries that do not support lazy dataframes, the function reads +a parquet file eagerly and then converts the resulting dataframe to a lazyframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ source
+ |
+
+ str
+ |
+
+
+
+ Path to a file. + |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Extra keyword arguments which are passed to the native parquet reader.
+For example, you could use
+ |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyFrame[Any]
+ |
+
+
+
+ LazyFrame. + |
+
Examples:
+>>> import dask.dataframe as dd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> from types import ModuleType
+
Let's create an agnostic function that lazily reads a parquet file with a specified native namespace:
+>>> def agnostic_scan_parquet(native_namespace: ModuleType) -> IntoFrame:
+... return nw.scan_parquet(
+... "file.parquet", native_namespace=native_namespace
+... ).to_native()
+
Then we can read the file by passing, for example, Polars or Dask namespaces:
+>>> agnostic_scan_parquet(native_namespace=pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+>>> agnostic_scan_parquet(native_namespace=dd).compute()
+ a b
+0 1 4
+1 2 5
+2 3 6
+
sum(*columns)
+
+Sum all values.
+ + +Syntactic sugar for nw.col(columns).sum()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pl = pl.DataFrame({"a": [1, 2]})
+>>> df_pd = pd.DataFrame({"a": [1, 2]})
+>>> df_pa = pa.table({"a": [1, 2]})
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.sum("a")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 3
+>>> my_library_agnostic_function(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 3 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[3]]
+
sum_horizontal(*exprs)
+
+Sum all values horizontally across columns.
+ + +Unlike Polars, we support horizontal sum over numeric columns only.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "b": [5, 10, None]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.sum_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a
+0 6.0
+1 12.0
+2 3.0
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 6 │
+│ 12 │
+│ 3 │
+└─────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[6,12,3]]
+
show_versions()
+
+Print useful debugging information.
+ + +Examples:
+>>> from narwhals import show_versions
+>>> show_versions()
+
to_native(narwhals_object, *, strict=None, pass_through=None)
+
+Convert Narwhals object to native one.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ narwhals_object
+ |
+
+ DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]
+ |
+
+
+
+ Narwhals object. + |
+ + required + | +
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if
Deprecated (v1.13.0):
+ Please use |
+
+ None
+ |
+
+ pass_through
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if
|
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ IntoFrameT | Any
+ |
+
+
+
+ Object of class that user started with. + |
+
to_py_scalar(scalar_like)
+
+If a scalar is not Python native, converts it to Python native.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ scalar_like
+ |
+
+ Any
+ |
+
+
+
+ Scalar-like value. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ Python scalar. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the object is not convertible to a scalar. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+>>> nw.to_py_scalar(df["a"].item(0))
+1
+>>> import pyarrow as pa
+>>> df = nw.from_native(pa.table({"a": [1, 2, 3]}))
+>>> nw.to_py_scalar(df["a"].item(0))
+1
+>>> nw.to_py_scalar(1)
+1
+
when(*predicates)
+
+Start a when-then-otherwise
expression.
Expression similar to an if-else
statement in Python. Always initiated by a
+pl.when(<condition>).then(<value if condition>)
, and optionally followed by
+chaining one or more .when(<condition>).then(<value>)
statements.
+Chained when-then operations should be read as Python if, elif, ... elif
+blocks, not as if, if, ... if
, i.e. the first condition that evaluates to
+True
will be picked.
+If none of the conditions are True
, an optional
+.otherwise(<value if all statements are false>)
can be appended at the end.
+If not appended, and none of the conditions are True
, None
will be returned.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Condition(s) that must be met in order to apply the subsequent statement.
+Accepts one or more boolean expressions, which are implicitly combined with |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ When
+ |
+
+
+
+ A "when" object, which |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+>>> df_pa = pa.table({"a": [1, 2, 3], "b": [5, 10, 15]})
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
+... ).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
+ a b a_when
+0 1 5 5
+1 2 10 5
+2 3 15 6
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 3)
+┌─────┬─────┬────────┐
+│ a ┆ b ┆ a_when │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i32 │
+╞═════╪═════╪════════╡
+│ 1 ┆ 5 ┆ 5 │
+│ 2 ┆ 10 ┆ 5 │
+│ 3 ┆ 15 ┆ 6 │
+└─────┴─────┴────────┘
+>>> my_library_agnostic_function(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+a_when: int64
+----
+a: [[1,2,3]]
+b: [[5,10,15]]
+a_when: [[5,5,6]]
+
narwhals.Schema
Ordered mapping of column names to their data type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ schema
+ |
+
+ Mapping[str, DType] | Iterable[tuple[str, DType]] | None
+ |
+
+
+
+ Mapping[str, DType] | Iterable[tuple[str, DType]] | None +The schema definition given by column names and their associated. +instantiated Narwhals data type. Accepts a mapping or an iterable of tuples. + |
+
+ None
+ |
+
Examples:
+Define a schema by passing instantiated data types.
+>>> import narwhals as nw
+>>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()})
+>>> schema
+Schema({'foo': Int8, 'bar': String})
+
Access the data type associated with a specific column name.
+>>> schema["foo"]
+Int8
+
Access various schema properties using the names
, dtypes
, and len
methods.
>>> schema.names()
+['foo', 'bar']
+>>> schema.dtypes()
+[Int8, String]
+>>> schema.len()
+2
+
names()
+
+Get the column names of the schema.
+ + +Returns:
+Type | +Description | +
---|---|
+ list[str]
+ |
+
+
+
+ Column names. + |
+
dtypes()
+
+Get the data types of the schema.
+ + +Returns:
+Type | +Description | +
---|---|
+ list[DType]
+ |
+
+
+
+ Data types of schema. + |
+
len()
+
+Get the number of columns in the schema.
+ + +Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ Number of columns. + |
+
narwhals.selectors
The following selectors are all supported. In addition, just like in Polars, the following +set operations are supported:
+&
|
-
~
boolean()
+
+Select boolean columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select boolean +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.boolean())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ c
+0 False
+1 True
+>>> func(df_pl)
+shape: (2, 1)
+┌───────┐
+│ c │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ true │
+└───────┘
+
by_dtype(*dtypes)
+
+Select columns based on their dtype.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtypes
+ |
+
+ Any
+ |
+
+
+
+ one or data types to select + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select int64 and float64 +dtypes and multiplies each value by 2:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.by_dtype(nw.Int64, nw.Float64) * 2)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ a c
+0 2 8.2
+1 4 4.6
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ c │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 8.2 │
+│ 4 ┆ 4.6 │
+└─────┴─────┘
+
categorical()
+
+Select categorical columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data).astype({"b": "category"})
+>>> df_pl = pl.DataFrame(data, schema_overrides={"b": pl.Categorical})
+
Let's define a dataframe-agnostic function to select string +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.categorical())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ b
+0 x
+1 y
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ b │
+│ --- │
+│ cat │
+╞═════╡
+│ x │
+│ y │
+└─────┘
+
numeric()
+
+Select numeric columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select numeric +dtypes and multiplies each value by 2:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.numeric() * 2)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ a c
+0 2 8.2
+1 4 4.6
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ c │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 8.2 │
+│ 4 ┆ 4.6 │
+└─────┴─────┘
+
string()
+
+Select string columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select string +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.string())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ b
+0 x
+1 y
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ b │
+│ --- │
+│ str │
+╞═════╡
+│ x │
+│ y │
+└─────┘
+
narwhals.Series
Narwhals Series, backed by a native series.
+The native series might be pandas.Series, polars.Series, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
, making sure to pass allow_series=True
or
+series_only=True
.
dtype: DType
+
+
+ property
+
+
+Get the data type of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> nw.dtypes.DType:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dtype
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+Int64
+>>> my_library_agnostic_function(s_pl)
+Int64
+
implementation: Implementation
+
+
+ property
+
+
+Return implementation of native Series.
+This can be useful when you need to some special-casing for +some libraries for features outside of Narwhals' scope - for +example, when dealing with pandas' Period Dtype.
+ + +Returns:
+Type | +Description | +
---|---|
+ Implementation
+ |
+
+
+
+ Implementation. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> s_native = pd.Series([1, 2, 3])
+>>> s = nw.from_native(s_native, series_only=True)
+>>> s.implementation
+<Implementation.PANDAS: 1>
+>>> s.implementation.is_pandas()
+True
+>>> s.implementation.is_pandas_like()
+True
+>>> s.implementation.is_polars()
+False
+
name: str
+
+
+ property
+
+
+Get the name of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="foo")
+>>> s_pl = pl.Series("foo", s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries) -> str:
+... s = nw.from_native(s_native, series_only=True)
+... return s.name
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+'foo'
+>>> my_library_agnostic_function(s_pl)
+'foo'
+
shape: tuple[int]
+
+
+ property
+
+
+Get the shape of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries) -> tuple[int]:
+... s = nw.from_native(s_native, series_only=True)
+... return s.shape
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+(3,)
+>>> my_library_agnostic_function(s_pl)
+(3,)
+
__arrow_c_stream__(requested_schema=None)
+
+Export a Series via the Arrow PyCapsule Interface.
+Narwhals doesn't implement anything itself here:
+to_arrow
and then defer to PyArrow's implementationSee PyCapsule Interface +for more.
+ +__getitem__(idx)
+
+Retrieve elements from the object using integer indexing or slicing.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ idx
+ |
+
+ int | slice | Sequence[int]
+ |
+
+
+
+ The index, slice, or sequence of indices to retrieve. +
|
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any | Self
+ |
+
+
+
+ A single element if |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> from typing import Any
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+>>> s_pa = pa.chunked_array([s])
+
We define a library agnostic function:
+>>> def agnostic_get_first_item(s_native: IntoSeriesT) -> Any:
+... s = nw.from_native(s_native, series_only=True)
+... return s[0]
+
We can then pass either pandas, Polars, or any supported library:
+>>> agnostic_get_first_item(s_pd)
+np.int64(1)
+>>> agnostic_get_first_item(s_pl)
+1
+>>> agnostic_get_first_item(s_pa)
+1
+
We can also make a function to slice the Series:
+>>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s[:2].to_native()
+
>>> agnostic_slice(s_pd)
+0 1
+1 2
+dtype: int64
+>>> agnostic_slice(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 1
+ 2
+]
+>>> agnostic_slice(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2
+ ]
+]
+
__iter__()
+
+abs()
+
+Calculate the absolute value of each element.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [2, -4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.abs().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 2
+1 4
+2 3
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 3
+]
+
alias(name)
+
+Rename the Series.
+ + +This method is very cheap, but does not guarantee that data +will be copied. For example:
+s1: nw.Series
+s2 = s1.alias("foo")
+arr = s2.to_numpy()
+arr[0] = 999
+
may (depending on the backend, and on the version) result in
+s1
's data being modified. We recommend:
- if you need to alias an object and don't need the original
+ one around any more, just use `alias` without worrying about it.
+- if you were expecting `alias` to copy data, then explicily call
+ `.clone` before calling `alias`.
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="foo")
+>>> s_pl = pl.Series("foo", s)
+>>> s_pa = pa.chunked_array([s])
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.alias("bar").to_native()
+
We can then pass any supported library such as pandas, Polars, or PyArrow:
+>>> my_library_agnostic_function(s_pd)
+0 1
+1 2
+2 3
+Name: bar, dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: 'bar' [i64]
+[
+ 1
+ 2
+ 3
+]
+>>> my_library_agnostic_function(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 1,
+ 2,
+ 3
+ ]
+]
+
all()
+
+Return whether all values in the Series are True.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [True, False, True]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.all()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.False_
+>>> my_library_agnostic_function(s_pl)
+False
+
any()
+
+Return whether any of the values in the Series are True.
+ + +Only works on Series of data type Boolean.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [False, True, False]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.any()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.True_
+>>> my_library_agnostic_function(s_pl)
+True
+
arg_max()
+
+Returns the index of the maximum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+>>> s_pa = pa.chunked_array([s])
+
We define a library agnostic function:
+>>> def agnostic_arg_max(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.arg_max()
+
We can then pass either any supported library such as pandas, Polars, +or PyArrow:
+>>> agnostic_arg_max(s_pd)
+np.int64(2)
+>>> agnostic_arg_max(s_pl)
+2
+>>> agnostic_arg_max(s_pa)
+2
+
arg_min()
+
+Returns the index of the minimum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+>>> s_pa = pa.chunked_array([s])
+
We define a library agnostic function:
+>>> def agnostic_arg_min(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.arg_min()
+
We can then pass either any supported library such as pandas, Polars, +or PyArrow:
+>>> agnostic_arg_min(s_pd)
+np.int64(0)
+>>> agnostic_arg_min(s_pl)
+0
+>>> agnostic_arg_min(s_pa)
+0
+
arg_true()
+
+Find elements where boolean Series is True.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [1, None, None, 2]
+>>> s_pd = pd.Series(data, name="a")
+>>> s_pl = pl.Series("a", data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_null().arg_true().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+1 1
+2 2
+Name: a, dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: 'a' [u32]
+[
+ 1
+ 2
+]
+
cast(dtype)
+
+Cast between data types.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtype
+ |
+
+ DType | type[DType]
+ |
+
+
+
+ Data type that the object will be cast into. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [True, False, True]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cast(nw.Int64).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1
+1 0
+2 1
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 0
+ 1
+]
+
clip(lower_bound=None, upper_bound=None)
+
+Clip values in the Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any | None
+ |
+
+
+
+ Lower bound value. + |
+
+ None
+ |
+
+ upper_bound
+ |
+
+ Any | None
+ |
+
+
+
+ Upper bound value. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>>
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def clip_lower(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.clip(2).to_native()
+
We can then pass either pandas or Polars to clip_lower
:
>>> clip_lower(s_pd)
+0 2
+1 2
+2 3
+dtype: int64
+>>> clip_lower(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 2
+ 3
+]
+
We define another library agnostic function:
+>>> def clip_upper(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.clip(upper_bound=2).to_native()
+
We can then pass either pandas or Polars to clip_upper
:
>>> clip_upper(s_pd)
+0 1
+1 2
+2 2
+dtype: int64
+>>> clip_upper(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 2
+]
+
We can have both at the same time
+>>> s = [-1, 1, -3, 3, -5, 5]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.clip(-1, 3).to_native()
+
We can pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 -1
+1 1
+2 -1
+3 3
+4 -1
+5 3
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (6,)
+Series: '' [i64]
+[
+ -1
+ 1
+ -1
+ 3
+ -1
+ 3
+]
+
count()
+
+Returns the number of non-null elements in the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.count()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.int64(3)
+>>> my_library_agnostic_function(s_pl)
+3
+
cum_count(*, reverse=False)
+
+Return the cumulative count of the non-null values in the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = ["x", "k", None, "d"]
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_count(reverse=True).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.Series(data))
+0 3
+1 2
+2 1
+3 1
+dtype: int64
+>>> my_library_agnostic_function(pl.Series(data))
+shape: (4,)
+Series: '' [u32]
+[
+ 3
+ 2
+ 1
+ 1
+]
+>>> my_library_agnostic_function(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 3,
+ 2,
+ 1,
+ 1
+ ]
+]
+
cum_max(*, reverse=False)
+
+Return the cumulative max of the non-null values in the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [1, 3, None, 2]
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_max().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.Series(data))
+0 1.0
+1 3.0
+2 NaN
+3 3.0
+dtype: float64
+>>> my_library_agnostic_function(pl.Series(data))
+shape: (4,)
+Series: '' [i64]
+[
+ 1
+ 3
+ null
+ 3
+]
+>>> my_library_agnostic_function(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 3,
+ null,
+ 3
+ ]
+]
+
cum_min(*, reverse=False)
+
+Return the cumulative min of the non-null values in the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [3, 1, None, 2]
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_min().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.Series(data))
+0 3.0
+1 1.0
+2 NaN
+3 1.0
+dtype: float64
+>>> my_library_agnostic_function(pl.Series(data))
+shape: (4,)
+Series: '' [i64]
+[
+ 3
+ 1
+ null
+ 1
+]
+>>> my_library_agnostic_function(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 3,
+ 1,
+ null,
+ 1
+ ]
+]
+
cum_prod(*, reverse=False)
+
+Return the cumulative product of the non-null values in the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [1, 3, None, 2]
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_prod().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.Series(data))
+0 1.0
+1 3.0
+2 NaN
+3 6.0
+dtype: float64
+>>> my_library_agnostic_function(pl.Series(data))
+shape: (4,)
+Series: '' [i64]
+[
+ 1
+ 3
+ null
+ 6
+]
+>>> my_library_agnostic_function(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 3,
+ null,
+ 6
+ ]
+]
+
cum_sum(*, reverse=False)
+
+Calculate the cumulative sum.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_sum().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 2
+1 6
+2 9
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 6
+ 9
+]
+
diff()
+
+Calculate the difference with the previous element, for each element.
+ + +pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
s.diff().fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.diff().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 NaN
+1 2.0
+2 -1.0
+dtype: float64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ -1
+]
+
drop_nulls()
+
+Drop all null values.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s_pd = pd.Series([2, 4, None, 3, 5])
+>>> s_pl = pl.Series("a", [2, 4, None, 3, 5])
+
Now define a dataframe-agnostic function with a column
argument for the column to evaluate :
>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.drop_nulls().to_native()
+
Then we can pass either Series (polars or pandas) to func
:
>>> my_library_agnostic_function(s_pd)
+0 2.0
+1 4.0
+3 3.0
+4 5.0
+dtype: float64
+>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: 'a' [i64]
+[
+ 2
+ 4
+ 3
+ 5
+]
+
ewm_mean(*, com=None, span=None, half_life=None, alpha=None, adjust=True, min_periods=1, ignore_nulls=False)
+
+Compute exponentially-weighted moving average.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ com
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of center of mass, \(\gamma\), with |
+
+ None
+ |
+
+ span
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of span, \(\theta\), with |
+
+ None
+ |
+
+ half_life
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of half-life, \(\tau\), with |
+
+ None
+ |
+
+ alpha
+ |
+
+ float | None
+ |
+
+
+
+ Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\). + |
+
+ None
+ |
+
+ adjust
+ |
+
+ bool
+ |
+
+
+
+ Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings +
|
+
+ True
+ |
+
+ min_periods
+ |
+
+ int
+ |
+
+
+
+ Minimum number of observations in window required to have a value (otherwise result is null). + |
+
+ 1
+ |
+
+ ignore_nulls
+ |
+
+ bool
+ |
+
+
+
+ Ignore missing values when calculating weights. +
|
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ Series + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(name="a", data=data)
+>>> s_pl = pl.Series(name="a", values=data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.ewm_mean(com=1, ignore_nulls=False).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1.000000
+1 1.666667
+2 2.428571
+Name: a, dtype: float64
+
>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: 'a' [f64]
+[
+ 1.0
+ 1.666667
+ 2.428571
+]
+
fill_null(value=None, strategy=None, limit=None)
+
+Fill null values using the specified value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any | None
+ |
+
+
+
+ Value used to fill null values. + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['forward', 'backward'] | None
+ |
+
+
+
+ Strategy used to fill null values. + |
+
+ None
+ |
+
+ limit
+ |
+
+ int | None
+ |
+
+
+
+ Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. + |
+
+ None
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [1, 2, None]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.fill_null(5).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1.0
+1 2.0
+2 5.0
+dtype: float64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 5
+]
+
Using a strategy:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.fill_null(strategy="forward", limit=1).to_native()
+
>>> my_library_agnostic_function(s_pd)
+0 1.0
+1 2.0
+2 2.0
+dtype: float64
+
>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 2
+]
+
filter(other)
+
+Filter elements in the Series based on a condition.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [4, 10, 15, 34, 50]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.filter(s > 10).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+2 15
+3 34
+4 50
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 15
+ 34
+ 50
+]
+
gather_every(n, offset=0)
+
+Take every nth value in the Series and return as new Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, 3, 4]
+>>> s_pd = pd.Series(name="a", data=data)
+>>> s_pl = pl.Series(name="a", values=data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.gather_every(n=2, offset=1).to_native()
+
>>> my_library_agnostic_function(s_pd)
+1 2
+3 4
+Name: a, dtype: int64
+
>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 2
+ 4
+]
+
head(n=10)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(10))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that returns the first 3 rows:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.head(3).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 0
+1 1
+2 2
+dtype: int64
+
>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 0
+ 1
+ 2
+]
+
is_between(lower_bound, upper_bound, closed='both')
+
+Get a boolean mask of the values that are between the given lower/upper bounds.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any
+ |
+
+
+
+ Lower bound value. + |
+ + required + | +
+ upper_bound
+ |
+
+ Any
+ |
+
+
+
+ Upper bound value. + |
+ + required + | +
+ closed
+ |
+
+ str
+ |
+
+
+
+ Define which sides of the interval are closed (inclusive). + |
+
+ 'both'
+ |
+
If the value of the lower_bound
is greater than that of the upper_bound
,
+then the values will be False, as no value can satisfy the condition.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s_pd = pd.Series([1, 2, 3, 4, 5])
+>>> s_pl = pl.Series([1, 2, 3, 4, 5])
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_between(2, 4, "right").to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 False
+1 False
+2 True
+3 True
+4 False
+dtype: bool
+>>> my_library_agnostic_function(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ true
+ false
+]
+
is_duplicated()
+
+Get a mask of all duplicated rows in the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 2, 3, 1])
+>>> s_pl = pl.Series([1, 2, 3, 1])
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_duplicated().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 True
+1 False
+2 False
+3 True
+dtype: bool
+>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ true
+ false
+ false
+ true
+]
+
is_empty()
+
+Check if the series is empty.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that filters rows in which "foo" +values are greater than 10, and then checks if the result is empty or not:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.filter(s > 10).is_empty()
+
We can then pass either pandas or Polars to func
:
>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+>>> my_library_agnostic_function(s_pd), my_library_agnostic_function(s_pl)
+(True, True)
+
>>> s_pd = pd.Series([100, 2, 3])
+>>> s_pl = pl.Series([100, 2, 3])
+>>> my_library_agnostic_function(s_pd), my_library_agnostic_function(s_pl)
+(False, False)
+
is_finite()
+
+Returns a boolean Series indicating which values are finite.
+ + +Different backend handle null values differently. is_finite
will return
+False for NaN and Null's in the Dask and pandas non-nullable backend, while
+for Polars, PyArrow and pandas nullable backends null values are kept as such.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ Expression of |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [float("nan"), float("inf"), 2.0, None]
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_finite().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.Series(data))
+0 False
+1 False
+2 True
+3 False
+dtype: bool
+
>>> my_library_agnostic_function(
+... pl.Series(data)
+... )
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ null
+]
+
>>> my_library_agnostic_function(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ false,
+ true,
+ null
+ ]
+]
+
is_first_distinct()
+
+Return a boolean mask indicating the first occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2])
+>>> s_pl = pl.Series([1, 1, 2, 3, 2])
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_first_distinct().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 True
+1 False
+2 True
+3 True
+4 False
+dtype: bool
+
>>> my_library_agnostic_function(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ true
+ false
+ true
+ true
+ false
+]
+
is_in(other)
+
+Check if the elements of this Series are in the other sequence.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Any
+ |
+
+
+
+ Sequence of primitive type. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_in([3, 2, 8]).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 False
+1 True
+2 True
+dtype: bool
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+]
+
is_last_distinct()
+
+Return a boolean mask indicating the last occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2])
+>>> s_pl = pl.Series([1, 1, 2, 3, 2])
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_last_distinct().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 False
+1 True
+2 False
+3 True
+4 True
+dtype: bool
+
>>> my_library_agnostic_function(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ true
+ false
+ true
+ true
+]
+
is_null()
+
+Returns a boolean Series indicating which values are null.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [1, 2, None]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_null().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 False
+1 False
+2 True
+dtype: bool
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+]
+
is_sorted(*, descending=False)
+
+Check if the Series is sorted.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Check if the Series is sorted in descending order. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> import pandas as pd
+>>> import polars as pl
+>>> unsorted_data = [1, 3, 2]
+>>> sorted_data = [3, 2, 1]
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(
+... s_native: IntoSeries, descending: bool = False
+... ):
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_sorted(descending=descending)
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(pl.Series(unsorted_data))
+False
+>>> my_library_agnostic_function(pl.Series(sorted_data), descending=True)
+True
+>>> my_library_agnostic_function(pd.Series(unsorted_data))
+False
+>>> my_library_agnostic_function(pd.Series(sorted_data), descending=True)
+True
+
is_unique()
+
+Get a mask of all unique rows in the Series.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 2, 3, 1])
+>>> s_pl = pl.Series([1, 2, 3, 1])
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_unique().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 False
+1 True
+2 True
+3 False
+dtype: bool
+
>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+ false
+]
+
item(index=None)
+
+Return the Series as a scalar, or return the element at the given index.
+If no index is provided, this is equivalent to s[0]
, with a check
+that the shape is (1,). With an index, this is equivalent to s[index]
.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that returns item at given index
+>>> def my_library_agnostic_function(s_native: IntoSeries, index=None):
+... s = nw.from_native(s_native, series_only=True)
+... return s.item(index)
+
We can then pass either pandas or Polars to func
:
>>> (
+... my_library_agnostic_function(pl.Series("a", [1]), None),
+... my_library_agnostic_function(pd.Series([1]), None),
+... )
+(1, np.int64(1))
+
>>> (
+... my_library_agnostic_function(pl.Series("a", [9, 8, 7]), -1),
+... my_library_agnostic_function(pl.Series([9, 8, 7]), -2),
+... )
+(7, 8)
+
len()
+
+Return the number of elements in the Series.
+Null values count towards the total.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that computes the len of the series:
+>>> def my_library_agnostic_function(s_native: IntoSeries) -> int:
+... s = nw.from_native(s_native, series_only=True)
+... return s.len()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+3
+>>> my_library_agnostic_function(s_pl)
+3
+
max()
+
+Get the maximum value in this Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.max()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.int64(3)
+>>> my_library_agnostic_function(s_pl)
+3
+
mean()
+
+Reduce this Series to the mean value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.mean()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.float64(2.0)
+>>> my_library_agnostic_function(s_pl)
+2.0
+
median()
+
+Reduce this Series to the median value.
+ + +Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [5, 3, 8]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+>>> s_pa = pa.chunked_array([s])
+
Let's define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.median()
+
We can then pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(s_pd)
+np.float64(5.0)
+>>> my_library_agnostic_function(s_pl)
+5.0
+>>> my_library_agnostic_function(s_pa)
+5.0
+
min()
+
+Get the minimal value in this Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.min()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.int64(1)
+>>> my_library_agnostic_function(s_pl)
+1
+
mode()
+
+Compute the most occurring value(s).
+Can return multiple values.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 1, 2, 2, 3]
+>>> s_pd = pd.Series(name="a", data=data)
+>>> s_pl = pl.Series(name="a", values=data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.mode().sort().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1
+1 2
+Name: a, dtype: int64
+
>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+
n_unique()
+
+Count the number of unique values.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.n_unique()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+3
+>>> my_library_agnostic_function(s_pl)
+3
+
null_count()
+
+Create a new Series that shows the null counts per column.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, None, 3])
+>>> s_pl = pl.Series([1, None, None])
+
Let's define a dataframe-agnostic function that returns the null count of +the series:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.null_count()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.int64(1)
+>>> my_library_agnostic_function(s_pl)
+2
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s_pd = pd.Series([1, 2, 3, 4])
+>>> s_pl = pl.Series([1, 2, 3, 4])
+
Lets define a function to pipe into
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.pipe(lambda x: x + 2).to_native()
+
Now apply it to the series
+>>> my_library_agnostic_function(s_pd)
+0 3
+1 4
+2 5
+3 6
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ 3
+ 4
+ 5
+ 6
+]
+
quantile(quantile, interpolation)
+
+Get quantile value of the series.
+ + +pandas and Polars may have implementation differences for a given interpolation method.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ quantile
+ |
+
+ float
+ |
+
+
+
+ Quantile between 0.0 and 1.0. + |
+ + required + | +
+ interpolation
+ |
+
+ Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
+ |
+
+
+
+ Interpolation method. + |
+ + required + | +
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(50))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return [
+... s.quantile(quantile=q, interpolation="nearest")
+... for q in (0.1, 0.25, 0.5, 0.75, 0.9)
+... ]
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+[np.int64(5), np.int64(12), np.int64(24), np.int64(37), np.int64(44)]
+
>>> my_library_agnostic_function(s_pl)
+[5.0, 12.0, 25.0, 37.0, 44.0]
+
rename(name)
+
+Rename the Series.
+Alias for Series.alias()
.
This method is very cheap, but does not guarantee that data +will be copied. For example:
+s1: nw.Series
+s2 = s1.rename("foo")
+arr = s2.to_numpy()
+arr[0] = 999
+
may (depending on the backend, and on the version) result in
+s1
's data being modified. We recommend:
- if you need to rename an object and don't need the original
+ one around any more, just use `rename` without worrying about it.
+- if you were expecting `rename` to copy data, then explicily call
+ `.clone` before calling `rename`.
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="foo")
+>>> s_pl = pl.Series("foo", s)
+>>> s_pa = pa.chunked_array([s])
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rename("bar").to_native()
+
We can then pass any supported library such as pandas, Polars, or PyArrow:
+>>> my_library_agnostic_function(s_pd)
+0 1
+1 2
+2 3
+Name: bar, dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: 'bar' [i64]
+[
+ 1
+ 2
+ 3
+]
+>>> my_library_agnostic_function(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 1,
+ 2,
+ 3
+ ]
+]
+
replace_strict(old, new=None, *, return_dtype=None)
+
+Replace all values by different values.
+This function must replace all non-null input values (else it raises an error).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ old
+ |
+
+ Sequence[Any] | Mapping[Any, Any]
+ |
+
+
+
+ Sequence of values to replace. It also accepts a mapping of values to
+their replacement as syntactic sugar for
+ |
+ + required + | +
+ new
+ |
+
+ Sequence[Any] | None
+ |
+
+
+
+ Sequence of values to replace by. Length must match the length of |
+
+ None
+ |
+
+ return_dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ The data type of the resulting expression. If set to |
+
+ None
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> df_pd = pd.DataFrame({"a": [3, 0, 1, 2]})
+>>> df_pl = pl.DataFrame({"a": [3, 0, 1, 2]})
+>>> df_pa = pa.table({"a": [3, 0, 1, 2]})
+
Let's define dataframe-agnostic functions:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.replace_strict(
+... [0, 1, 2, 3], ["zero", "one", "two", "three"], return_dtype=nw.String
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd["a"])
+0 three
+1 zero
+2 one
+3 two
+Name: a, dtype: object
+>>> my_library_agnostic_function(df_pl["a"])
+shape: (4,)
+Series: 'a' [str]
+[
+ "three"
+ "zero"
+ "one"
+ "two"
+]
+>>> my_library_agnostic_function(df_pa["a"])
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "three",
+ "zero",
+ "one",
+ "two"
+ ]
+]
+
rolling_mean(window_size, *, min_periods=None, center=False)
+
+Apply a rolling mean (moving mean) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their mean.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [1.0, 2.0, 3.0, 4.0]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_rolling_mean(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rolling_mean(window_size=2).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> agnostic_rolling_mean(s_pd)
+0 NaN
+1 1.5
+2 2.5
+3 3.5
+dtype: float64
+
>>> agnostic_rolling_mean(s_pl)
+shape: (4,)
+Series: '' [f64]
+[
+ null
+ 1.5
+ 2.5
+ 3.5
+]
+
>>> agnostic_rolling_mean(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ null,
+ 1.5,
+ 2.5,
+ 3.5
+ ]
+]
+
rolling_sum(window_size, *, min_periods=None, center=False)
+
+Apply a rolling sum (moving sum) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their sum.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [1.0, 2.0, 3.0, 4.0]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_rolling_sum(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rolling_sum(window_size=2).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> agnostic_rolling_sum(s_pd)
+0 NaN
+1 3.0
+2 5.0
+3 7.0
+dtype: float64
+
>>> agnostic_rolling_sum(s_pl)
+shape: (4,)
+Series: '' [f64]
+[
+ null
+ 3.0
+ 5.0
+ 7.0
+]
+
>>> agnostic_rolling_sum(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ null,
+ 3,
+ 5,
+ 7
+ ]
+]
+
round(decimals=0)
+
+Round underlying floating point data by decimals
digits.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ decimals
+ |
+
+ int
+ |
+
+
+
+ Number of decimals to round by. + |
+
+ 0
+ |
+
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and +4.5 to 4.0, etc..).
+Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1.12345, 2.56789, 3.901234]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.round(1).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1.1
+1 2.6
+2 3.9
+dtype: float64
+
>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [f64]
+[
+ 1.1
+ 2.6
+ 3.9
+]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample randomly from this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
The sample
method returns a Series with a specified number of
+randomly selected items chosen from this Series.
+The results are not consistent across libraries.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+
>>> s_pd = pd.Series([1, 2, 3, 4])
+>>> s_pl = pl.Series([1, 2, 3, 4])
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.sample(fraction=1.0, with_replacement=True).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+ a
+2 3
+1 2
+3 4
+3 4
+>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ 1
+ 4
+ 3
+ 4
+]
+
scatter(indices, values)
+
+Set value(s) at given position(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ indices
+ |
+
+ int | Sequence[int]
+ |
+
+
+
+ Position(s) to set items at. + |
+ + required + | +
+ values
+ |
+
+ Any
+ |
+
+
+
+ Values to set. + |
+ + required + | +
This method always returns a new Series, without modifying the original one. +Using this function in a for-loop is an anti-pattern, we recommend building +up your positions and values beforehand and doing an update in one go.
+For example, instead of
+for i in [1, 3, 2]:
+ value = some_function(i)
+ s = s.scatter(i, value)
+
prefer
+positions = [1, 3, 2]
+values = [some_function(x) for x in positions]
+s = s.scatter(positions, values)
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(df["a"].scatter([0, 1], [999, 888])).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ a b
+0 999 4
+1 888 5
+2 3 6
+>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 999 ┆ 4 │
+│ 888 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+
shift(n)
+
+Shift values by n
positions.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of indices to shift forward. If a negative value is passed, +values are shifted in the opposite direction instead. + |
+ + required + | +
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to shift
+and fill missing values with 0
in a Int64 column, you could
+do:
s.shift(1).fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.shift(1).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 NaN
+1 2.0
+2 4.0
+dtype: float64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ 4
+]
+
sort(*, descending=False, nulls_last=False)
+
+Sort this Series. Place null values first.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Sort in descending order. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last instead of first. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [5, None, 1, 2]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define library agnostic functions:
+>>> def agnostic_sort(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.sort().to_native()
+
>>> def agnostic_sort_descending(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.sort(descending=True).to_native()
+
We can then pass either pandas or Polars to agnostic_sort
:
>>> agnostic_sort(s_pd)
+1 NaN
+2 1.0
+3 2.0
+0 5.0
+dtype: float64
+>>> agnostic_sort(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ null
+ 1
+ 2
+ 5
+]
+>>> agnostic_sort_descending(s_pd)
+1 NaN
+0 5.0
+3 2.0
+2 1.0
+dtype: float64
+>>> agnostic_sort_descending(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ null
+ 5
+ 2
+ 1
+]
+
skew()
+
+Calculate the sample skewness of the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The sample skewness of the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> s = [1, 1, 2, 10, 100]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+>>> s_pa = pa.array(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.skew()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(s_pd)
+np.float64(1.4724267269058975)
+>>> func(s_pl)
+1.4724267269058975
+
The skewness is a measure of the asymmetry of the probability distribution. +A perfectly symmetric distribution has a skewness of 0.
+std(*, ddof=1)
+
+Get the standard deviation of this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ ddof
+ |
+
+ int
+ |
+
+
+
+ “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, + where N represents the number of elements. + |
+
+ 1
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.std()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.float64(1.0)
+>>> my_library_agnostic_function(s_pl)
+1.0
+
sum()
+
+Reduce this Series to the sum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.sum()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+np.int64(6)
+>>> my_library_agnostic_function(s_pl)
+6
+
tail(n=10)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(10))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that returns the last 3 rows:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.tail(3).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+7 7
+8 8
+9 9
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 7
+ 8
+ 9
+]
+
to_arrow()
+
+Convert to arrow.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> import pyarrow as pa
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, 3, 4]
+>>> s_pd = pd.Series(name="a", data=data)
+>>> s_pl = pl.Series(name="a", values=data)
+
Let's define a dataframe-agnostic function that converts to arrow:
+>>> def my_library_agnostic_function(s_native: IntoSeries) -> pa.Array:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_arrow()
+
>>> my_library_agnostic_function(s_pd)
+<pyarrow.lib.Int64Array object at ...>
+[
+ 1,
+ 2,
+ 3,
+ 4
+]
+
>>> my_library_agnostic_function(s_pl)
+<pyarrow.lib.Int64Array object at ...>
+[
+ 1,
+ 2,
+ 3,
+ 4
+]
+
to_dummies(*, separator='_', drop_first=False)
+
+Get dummy/indicator variables.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ separator
+ |
+
+ str
+ |
+
+
+
+ Separator/delimiter used when generating column names. + |
+
+ '_'
+ |
+
+ drop_first
+ |
+
+ bool
+ |
+
+
+
+ Remove the first category from the variable being encoded. + |
+
+ False
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries, IntoDataFrame
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data, name="a")
+>>> s_pl = pl.Series("a", data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> def my_library_agnostic_function(
+... s_native: IntoSeries, drop_first: bool = False
+... ) -> IntoDataFrame:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_dummies(drop_first=drop_first).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+ a_1 a_2 a_3
+0 1 0 0
+1 0 1 0
+2 0 0 1
+
>>> my_library_agnostic_function(s_pd, drop_first=True)
+ a_2 a_3
+0 0 0
+1 1 0
+2 0 1
+
>>> my_library_agnostic_function(s_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a_1 ┆ a_2 ┆ a_3 │
+│ --- ┆ --- ┆ --- │
+│ i8 ┆ i8 ┆ i8 │
+╞═════╪═════╪═════╡
+│ 1 ┆ 0 ┆ 0 │
+│ 0 ┆ 1 ┆ 0 │
+│ 0 ┆ 0 ┆ 1 │
+└─────┴─────┴─────┘
+>>> my_library_agnostic_function(s_pl, drop_first=True)
+shape: (3, 2)
+┌─────┬─────┐
+│ a_2 ┆ a_3 │
+│ --- ┆ --- │
+│ i8 ┆ i8 │
+╞═════╪═════╡
+│ 0 ┆ 0 │
+│ 1 ┆ 0 │
+│ 0 ┆ 1 │
+└─────┴─────┘
+
to_frame()
+
+Convert to dataframe.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries, IntoDataFrame
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries) -> IntoDataFrame:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_frame().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+ a
+0 1
+1 2
+2 3
+>>> my_library_agnostic_function(s_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
to_list()
+
+Convert to list.
+ + +This function converts to Python scalars. It's typically +more efficient to keep your data in the format native to +your original dataframe, so we recommend only calling this +when you absolutely need to.
+Returns:
+Type | +Description | +
---|---|
+ list[Any]
+ |
+
+
+
+ A list of Python objects. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_list()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+[1, 2, 3]
+>>> my_library_agnostic_function(s_pl)
+[1, 2, 3]
+
to_numpy()
+
+Convert to numpy.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> import numpy as np
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries) -> np.ndarray:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_numpy()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+array([1, 2, 3]...)
+>>> my_library_agnostic_function(s_pl)
+array([1, 2, 3]...)
+
to_pandas()
+
+Convert to pandas.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries) -> pd.Series:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_pandas()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1
+1 2
+2 3
+Name: a, dtype: int64
+>>> my_library_agnostic_function(s_pl)
+0 1
+1 2
+2 3
+Name: a, dtype: int64
+
to_native()
+
+Convert Narwhals series to native series.
+ + +Returns:
+Type | +Description | +
---|---|
+ IntoSeriesT
+ |
+
+
+
+ Series of class that user started with. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1
+1 2
+2 3
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 3
+]
+
unique(*, maintain_order=False)
+
+Returns unique values of the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original series. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> s = [2, 4, 4, 6]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.unique(maintain_order=True).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 2
+1 4
+2 6
+dtype: int64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 6
+]
+
value_counts(*, sort=False, parallel=False, name=None, normalize=False)
+
+Count the occurrences of unique values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ sort
+ |
+
+ bool
+ |
+
+
+
+ Sort the output by count in descending order. If set to False (default), +the order of the output is random. + |
+
+ False
+ |
+
+ parallel
+ |
+
+ bool
+ |
+
+
+
+ Execute the computation in parallel. Used for Polars only. + |
+
+ False
+ |
+
+ name
+ |
+
+ str | None
+ |
+
+
+
+ Give the resulting count column a specific name; if |
+
+ None
+ |
+
+ normalize
+ |
+
+ bool
+ |
+
+
+
+ If true gives relative frequencies of the unique values + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries, IntoDataFrame
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2], name="s")
+>>> s_pl = pl.Series(values=[1, 1, 2, 3, 2], name="s")
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeries) -> IntoDataFrame:
+... s = nw.from_native(s_native, series_only=True)
+... return s.value_counts(sort=True).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+ s count
+0 1 2
+1 2 2
+2 3 1
+
>>> my_library_agnostic_function(s_pl)
+shape: (3, 2)
+┌─────┬───────┐
+│ s ┆ count │
+│ --- ┆ --- │
+│ i64 ┆ u32 │
+╞═════╪═══════╡
+│ 1 ┆ 2 │
+│ 2 ┆ 2 │
+│ 3 ┆ 1 │
+└─────┴───────┘
+
zip_with(mask, other)
+
+Take values from self or other based on the given mask.
+Where mask evaluates true, take values from self. Where mask evaluates false, +take values from other.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mask
+ |
+
+ Self
+ |
+
+
+
+ Boolean Series + |
+ + required + | +
+ other
+ |
+
+ Self
+ |
+
+
+
+ Series of same type. + |
+ + required + | +
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> s1_pl = pl.Series([1, 2, 3, 4, 5])
+>>> s2_pl = pl.Series([5, 4, 3, 2, 1])
+>>> mask_pl = pl.Series([True, False, True, False, True])
+>>> s1_pd = pd.Series([1, 2, 3, 4, 5])
+>>> s2_pd = pd.Series([5, 4, 3, 2, 1])
+>>> mask_pd = pd.Series([True, False, True, False, True])
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(
+... s1_native: IntoSeriesT, mask_native: IntoSeriesT, s2_native: IntoSeriesT
+... ) -> IntoSeriesT:
+... s1 = nw.from_native(s1_native, series_only=True)
+... mask = nw.from_native(mask_native, series_only=True)
+... s2 = nw.from_native(s2_native, series_only=True)
+... return s1.zip_with(mask, s2).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(
+... s1_pl, mask_pl, s2_pl
+... )
+shape: (5,)
+Series: '' [i64]
+[
+ 1
+ 4
+ 3
+ 2
+ 5
+]
+>>> my_library_agnostic_function(s1_pd, mask_pd, s2_pd)
+0 1
+1 4
+2 3
+3 2
+4 5
+dtype: int64
+
narwhals.Series.cat
get_categories()
+
+Get unique categories from column.
+ + +Examples:
+Let's create some series:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["apple", "mango", "mango"]
+>>> s_pd = pd.Series(data, dtype="category")
+>>> s_pl = pl.Series(data, dtype=pl.Categorical)
+
We define a dataframe-agnostic function to get unique categories +from column 'fruits':
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cat.get_categories().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 apple
+1 mango
+dtype: object
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "apple"
+ "mango"
+]
+
narwhals.Series.dt
convert_time_zone(time_zone)
+
+Convert time zone.
+If converting from a time-zone-naive column, then conversion happens +as if converting from UTC.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Examples:
+>>> from datetime import datetime, timezone
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.convert_time_zone("Asia/Kathmandu").to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> my_library_agnostic_function(s_pd)
+0 2024-01-01 05:45:00+05:45
+1 2024-01-02 05:45:00+05:45
+dtype: datetime64[ns, Asia/Kathmandu]
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [datetime[μs, Asia/Kathmandu]]
+[
+ 2024-01-01 05:45:00 +0545
+ 2024-01-02 05:45:00 +0545
+]
+>>> my_library_agnostic_function(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2024-01-01 00:00:00.000000Z,
+ 2024-01-02 00:00:00.000000Z
+ ]
+]
+
date()
+
+Get the date in a datetime series.
+ + +Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If pandas default backend is being used. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]
+>>> s_pd = pd.Series(dates).convert_dtypes(dtype_backend="pyarrow")
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.date().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 2012-01-07
+1 2023-03-10
+dtype: date32[day][pyarrow]
+
>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [date]
+[
+ 2012-01-07
+ 2023-03-10
+]
+
day()
+
+Extracts the day in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [datetime(2022, 1, 1), datetime(2022, 1, 5)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.day().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1
+1 5
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 1
+ 5
+]
+
hour()
+
+Extracts the hour in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.hour().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 5
+1 9
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 5
+ 9
+]
+
microsecond()
+
+Extracts the microseconds in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [
+... datetime(2023, 5, 21, 12, 55, 10, 400000),
+... datetime(2023, 5, 21, 12, 55, 10, 600000),
+... datetime(2023, 5, 21, 12, 55, 10, 800000),
+... datetime(2023, 5, 21, 12, 55, 11, 0),
+... datetime(2023, 5, 21, 12, 55, 11, 200000),
+... ]
+
>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.microsecond().alias("datetime").to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 400000
+1 600000
+2 800000
+3 0
+4 200000
+Name: datetime, dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (5,)
+Series: 'datetime' [i32]
+[
+ 400000
+ 600000
+ 800000
+ 0
+ 200000
+]
+
millisecond()
+
+Extracts the milliseconds in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [
+... datetime(2023, 5, 21, 12, 55, 10, 400000),
+... datetime(2023, 5, 21, 12, 55, 10, 600000),
+... datetime(2023, 5, 21, 12, 55, 10, 800000),
+... datetime(2023, 5, 21, 12, 55, 11, 0),
+... datetime(2023, 5, 21, 12, 55, 11, 200000),
+... ]
+
>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.millisecond().alias("datetime").to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 400
+1 600
+2 800
+3 0
+4 200
+Name: datetime, dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (5,)
+Series: 'datetime' [i32]
+[
+ 400
+ 600
+ 800
+ 0
+ 200
+]
+
minute()
+
+Extracts the minute in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.minute().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 3
+1 12
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 3
+ 12
+]
+
month()
+
+Gets the month in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [datetime(2023, 2, 1), datetime(2023, 8, 3)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.month().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 2
+1 8
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 2
+ 8
+]
+
nanosecond()
+
+Extract the nanoseconds in a date series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [
+... datetime(2022, 1, 1, 5, 3, 10, 500000),
+... datetime(2022, 1, 5, 9, 12, 4, 60000),
+... ]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.nanosecond().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 500000000
+1 60000000
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i32]
+[
+ 500000000
+ 60000000
+]
+
ordinal_day()
+
+Get ordinal day.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.ordinal_day().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 1
+1 216
+dtype: int32
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i16]
+[
+ 1
+ 216
+]
+
replace_time_zone(time_zone)
+
+Replace time zone.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str | None
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Examples:
+>>> from datetime import datetime, timezone
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.replace_time_zone("Asia/Kathmandu").to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> my_library_agnostic_function(s_pd)
+0 2024-01-01 00:00:00+05:45
+1 2024-01-02 00:00:00+05:45
+dtype: datetime64[ns, Asia/Kathmandu]
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [datetime[μs, Asia/Kathmandu]]
+[
+ 2024-01-01 00:00:00 +0545
+ 2024-01-02 00:00:00 +0545
+]
+>>> my_library_agnostic_function(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2023-12-31 18:15:00.000000Z,
+ 2024-01-01 18:15:00.000000Z
+ ]
+]
+
second()
+
+Extracts the seconds in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [datetime(2022, 1, 1, 5, 3, 10), datetime(2022, 1, 5, 9, 12, 4)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.second().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 10
+1 4
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 10
+ 4
+]
+
timestamp(time_unit='us')
+
+Return a timestamp in the given time unit.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['ns', 'us', 'ms']
+ |
+
+
+
+ {'ns', 'us', 'ms'} +Time unit. + |
+
+ 'us'
+ |
+
Examples:
+>>> from datetime import date
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [date(2001, 1, 1), None, date(2001, 1, 3)]
+>>> s_pd = pd.Series(data, dtype="datetime64[ns]")
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.timestamp("ms").to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> my_library_agnostic_function(s_pd)
+0 9.783072e+11
+1 NaN
+2 9.784800e+11
+dtype: float64
+>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 978307200000
+ null
+ 978480000000
+]
+>>> my_library_agnostic_function(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 978307200000,
+ null,
+ 978480000000
+ ]
+]
+
total_microseconds()
+
+Get total microseconds.
+ + +The function outputs the total microseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [
+... timedelta(microseconds=10),
+... timedelta(milliseconds=1, microseconds=200),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.total_microseconds().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 10
+1 1200
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 1200
+]
+
total_milliseconds()
+
+Get total milliseconds.
+ + +The function outputs the total milliseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [
+... timedelta(milliseconds=10),
+... timedelta(milliseconds=20, microseconds=40),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.total_milliseconds().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 10
+1 20
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
total_minutes()
+
+Get total minutes.
+ + +The function outputs the total minutes in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.total_minutes().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 10
+1 20
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
total_nanoseconds()
+
+Get total nanoseconds.
+ + +The function outputs the total nanoseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+>>> s_pd = pd.to_datetime(pd.Series(data))
+>>> s_pl = pl.Series(data).str.to_datetime(time_unit="ns")
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.diff().dt.total_nanoseconds().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 NaN
+1 1.0
+dtype: float64
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ null
+ 1
+]
+
total_seconds()
+
+Get total seconds.
+ + +The function outputs the total seconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.total_seconds().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 10
+1 20
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
to_string(format)
+
+Convert a Date/Time/Datetime series into a String series with the given format.
+ + +Unfortunately, different libraries interpret format directives a bit +differently.
+"%.f"
for fractional seconds,
+ whereas pandas and Python stdlib use ".%f"
."%S"
as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".Therefore, we make the following adjustments:
+"%S.%f"
with "%S%.f"
."%S.%f"
with "%S"
.Workarounds like these don't make us happy, and we try to avoid them as +much as possible, but here we feel like it's the best compromise.
+If you just want to format a date/datetime Series as a local datetime +string, and have it work as consistently as possible across libraries, +we suggest using:
+"%Y-%m-%dT%H:%M:%S%.f"
for datetimes"%Y-%m-%d"
for datesthough note that, even then, different tools may return a different number +of trailing zeros. Nonetheless, this is probably consistent enough for +most applications.
+If you have an application where this is not enough, please open an issue +and let us know.
+Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [
+... datetime(2020, 3, 1),
+... datetime(2020, 4, 1),
+... datetime(2020, 5, 1),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.to_string("%Y/%m/%d").to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 2020/03/01
+1 2020/04/01
+2 2020/05/01
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [str]
+[
+ "2020/03/01"
+ "2020/04/01"
+ "2020/05/01"
+]
+
year()
+
+Get the year in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> dates = [datetime(2012, 1, 7), datetime(2023, 3, 10)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.year().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 2012
+1 2023
+dtype: int...
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [i32]
+[
+ 2012
+ 2023
+]
+
narwhals.Series.list
len()
+
+Return the number of elements in each list.
+Null values count towards the total.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new series. + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [[1, 2], [3, 4, None], None, []]
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_list_len(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.list.len().to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> agnostic_list_len(
+... pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
+... )
+0 2
+1 3
+2 <NA>
+3 0
+dtype: int32[pyarrow]
+
>>> agnostic_list_len(pl.Series(data))
+shape: (4,)
+Series: '' [u32]
+[
+ 2
+ 3
+ null
+ 0
+]
+
>>> agnostic_list_len(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 3,
+ null,
+ 0
+ ]
+]
+
narwhals.Series.str
contains(pattern, *, literal=False)
+
+Check if string contains a substring that matches a pattern.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A Character sequence or valid regular expression pattern. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ If True, treats the pattern as a literal string. + If False, assumes the pattern is a regular expression. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> pets = ["cat", "dog", "rabbit and parrot", "dove", None]
+>>> s_pd = pd.Series(pets)
+>>> s_pl = pl.Series(pets)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.contains("parrot|dove").to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 False
+1 False
+2 True
+3 True
+4 None
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ true
+ null
+]
+
ends_with(suffix)
+
+Check if string values end with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ suffix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.ends_with("ngo").to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 False
+1 True
+2 None
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ true
+ null
+]
+
head(n=5)
+
+Take the first n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is supported (see note (1.)) + |
+
+ 5
+ |
+
n
input is negative, head
returns characters up to the n-th from the end of the string.
+ For example, if n = -3
, then all characters except the last three are returned.n
characters, the full string is returned.Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> lyrics = ["Atatata", "taata", "taatatata", "zukkyun"]
+>>> s_pd = pd.Series(lyrics)
+>>> s_pl = pl.Series(lyrics)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.head().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 Atata
+1 taata
+2 taata
+3 zukky
+dtype: object
+>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "Atata"
+ "taata"
+ "taata"
+ "zukky"
+]
+
len_chars()
+
+Return the length of each string as the number of characters.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["foo", "Café", "345", "東京", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.len_chars().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 3.0
+1 4.0
+2 3.0
+3 2.0
+4 NaN
+dtype: float64
+
>>> my_library_agnostic_function(s_pl)
+shape: (5,)
+Series: '' [u32]
+[
+ 3
+ 4
+ 3
+ 2
+ null
+]
+
replace(pattern, value, *, literal=False, n=1)
+
+Replace first matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of matches to replace. + |
+
+ 1
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["123abc", "abc abc123"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... s = s.str.replace("abc", "")
+... return s.to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 123
+1 abc123
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "123"
+ " abc123"
+]
+
replace_all(pattern, value, *, literal=False)
+
+Replace all matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["123abc", "abc abc123"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... s = s.str.replace_all("abc", "")
+... return s.to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 123
+1 123
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "123"
+ " 123"
+]
+
slice(offset, length=None)
+
+Create subslices of the string values of a Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ offset
+ |
+
+ int
+ |
+
+
+
+ Start index. Negative indexing is supported. + |
+ + required + | +
+ length
+ |
+
+ int | None
+ |
+
+
+
+ Length of the slice. If set to |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["pear", None, "papaya", "dragonfruit"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.slice(4, length=3).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0
+1 None
+2 ya
+3 onf
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ ""
+ null
+ "ya"
+ "onf"
+]
+
Using negative indexes:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.slice(-3).to_native()
+
>>> my_library_agnostic_function(s_pd)
+0 ear
+1 None
+2 aya
+3 uit
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "ear"
+ null
+ "aya"
+ "uit"
+]
+
starts_with(prefix)
+
+Check if string values start with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ prefix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.starts_with("app").to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 True
+1 False
+2 None
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ true
+ false
+ null
+]
+
strip_chars(characters=None)
+
+Remove leading and trailing characters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ characters
+ |
+
+ str | None
+ |
+
+
+
+ The set of characters to be removed. All combinations of this set of characters will be stripped from the start and end of the string. If set to None (default), all leading and trailing whitespace is removed instead. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["apple", "\nmango"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... s = s.str.strip_chars()
+... return s.to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 apple
+1 mango
+dtype: object
+
>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "apple"
+ "mango"
+]
+
tail(n=5)
+
+Take the last n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is supported (see note (1.)) + |
+
+ 5
+ |
+
n
input is negative, tail
returns characters starting from the n-th from the beginning of
+ the string. For example, if n = -3
, then all characters except the first three are returned.n
characters, the full string is returned.Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> lyrics = ["Atatata", "taata", "taatatata", "zukkyun"]
+>>> s_pd = pd.Series(lyrics)
+>>> s_pl = pl.Series(lyrics)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.tail().to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(s_pd)
+0 atata
+1 taata
+2 atata
+3 kkyun
+dtype: object
+>>> my_library_agnostic_function(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "atata"
+ "taata"
+ "atata"
+ "kkyun"
+]
+
to_datetime(format=None)
+
+Parse Series with strings to a Series with Datetime dtype.
+ + +pandas defaults to nanosecond time unit, Polars to microsecond. +Prior to pandas 2.0, nanoseconds were the only time unit supported +in pandas, with no ability to set any other one. The ability to +set the time unit in pandas, if the version permits, will arrive.
+As different backends auto-infer format in different ways, if format=None
+there is no guarantee that the result will be equal.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str | None
+ |
+
+
+
+ Format to use for conversion. If set to None (default), the format is +inferred from the data. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = ["2020-01-01", "2020-01-02"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.to_datetime(format="%Y-%m-%d").to_native()
+
We can then pass any supported library such as pandas, Polars, or PyArrow::
+>>> my_library_agnostic_function(s_pd)
+0 2020-01-01
+1 2020-01-02
+dtype: datetime64[ns]
+>>> my_library_agnostic_function(s_pl)
+shape: (2,)
+Series: '' [datetime[μs]]
+[
+ 2020-01-01 00:00:00
+ 2020-01-02 00:00:00
+]
+>>> my_library_agnostic_function(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 2020-01-01 00:00:00.000000,
+ 2020-01-02 00:00:00.000000
+ ]
+]
+
to_lowercase()
+
+Transform string to lowercase variant.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT, IntoFrameT
+>>> data = {"fruits": ["APPLE", "MANGO", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... lower_col=nw.col("fruits").str.to_lowercase()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ fruits lower_col
+0 APPLE apple
+1 MANGO mango
+2 None None
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ lower_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ APPLE ┆ apple │
+│ MANGO ┆ mango │
+│ null ┆ null │
+└────────┴───────────┘
+
to_uppercase()
+
+Transform string to uppercase variant.
+ + +The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. +For more info see: https://github.com/apache/arrow/issues/34599 +There may be other unicode-edge-case-related variations across implementations.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... upper_col=nw.col("fruits").str.to_uppercase()
+... ).to_native()
+
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
+ fruits upper_col
+0 apple APPLE
+1 mango MANGO
+2 None None
+
>>> my_library_agnostic_function(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ upper_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ apple ┆ APPLE │
+│ mango ┆ MANGO │
+│ null ┆ null │
+└────────┴───────────┘
+
narwhals.typing
Narwhals comes fully statically typed. In addition to nw.DataFrame
, nw.Expr
,
+nw.Series
, nw.LazyFrame
, we also provide the following type hints:
DataFrameT = TypeVar('DataFrameT', bound='DataFrame[Any]')
+
+
+ module-attribute
+
+
+TypeVar bound to Narwhals DataFrame.
+Use this if your function can accept a Narwhals DataFrame and returns a Narwhals +DataFrame backed by the same backend.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import DataFrameT
+>>> @nw.narwhalify
+>>> def func(df: DataFrameT) -> DataFrameT:
+... return df.with_columns(c=df["a"] + 1)
+
Frame: TypeAlias = Union['DataFrame[Any]', 'LazyFrame[Any]']
+
+
+ module-attribute
+
+
+Narwhals DataFrame or Narwhals LazyFrame.
+Use this if your function can work with either and your function doesn't care +about its backend.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import Frame
+>>> @nw.narwhalify
+... def agnostic_columns(df: Frame) -> list[str]:
+... return df.columns
+
FrameT = TypeVar('FrameT', bound='Frame')
+
+
+ module-attribute
+
+
+TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame.
+Use this if your function accepts either nw.DataFrame
or nw.LazyFrame
and returns
+an object of the same kind.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import FrameT
+>>> @nw.narwhalify
+... def agnostic_func(df: FrameT) -> FrameT:
+... return df.with_columns(c=nw.col("a") + 1)
+
IntoDataFrame: TypeAlias = Union['NativeFrame', 'DataFrame[Any]', 'DataFrameLike']
+
+
+ module-attribute
+
+
+Anything which can be converted to a Narwhals DataFrame.
+Use this if your function accepts a narwhalifiable object but doesn't care about its backend.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.shape
+
IntoDataFrameT = TypeVar('IntoDataFrameT', bound='IntoDataFrame')
+
+
+ module-attribute
+
+
+TypeVar bound to object convertible to Narwhals DataFrame.
+Use this if your function accepts an object which can be converted to nw.DataFrame
+and returns an object of the same class.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrameT
+>>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.with_columns(c=df["a"] + 1).to_native()
+
IntoExpr: TypeAlias = Union['Expr', str, 'Series[Any]']
+
+
+ module-attribute
+
+
+Anything which can be converted to an expression.
+Use this to mean "either a Narwhals expression, or something which can be converted
+into one". For example, exprs
in DataFrame.select
is typed to accept IntoExpr
,
+as it can either accept a nw.Expr
(e.g. df.select(nw.col('a'))
) or a string
+which will be interpreted as a nw.Expr
, e.g. df.select('a')
.
IntoFrame: TypeAlias = Union['NativeFrame', 'DataFrame[Any]', 'LazyFrame[Any]', 'DataFrameLike']
+
+
+ module-attribute
+
+
+Anything which can be converted to a Narwhals DataFrame or LazyFrame.
+Use this if your function can accept an object which can be converted to either
+nw.DataFrame
or nw.LazyFrame
and it doesn't care about its backend.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.collect_schema().names()
+
IntoFrameT = TypeVar('IntoFrameT', bound='IntoFrame')
+
+
+ module-attribute
+
+
+TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame.
+Use this if your function accepts an object which is convertible to nw.DataFrame
+or nw.LazyFrame
and returns an object of the same type.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(c=nw.col("a") + 1).to_native()
+
IntoSeries: TypeAlias = Union['Series[Any]', 'NativeSeries']
+
+
+ module-attribute
+
+
+Anything which can be converted to a Narwhals Series.
+Use this if your function can accept an object which can be converted to nw.Series
+and it doesn't care about its backend.
Examples:
+>>> from typing import Any
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]:
+... s = nw.from_native(s_native)
+... return s.to_list()
+
IntoSeriesT = TypeVar('IntoSeriesT', bound='IntoSeries')
+
+
+ module-attribute
+
+
+TypeVar bound to object convertible to Narwhals Series.
+Use this if your function accepts an object which can be converted to nw.Series
+and returns an object of the same class.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.abs().to_native()
+
nw.narwhalify
, or nw.from_native
?Although some people find the former more readable, the latter is better at preserving type hints.
+Here's an example: +
import polars as pl
+import narwhals as nw
+from narwhals.typing import IntoDataFrameT, DataFrameT
+
+df = pl.DataFrame({"a": [1, 2, 3]})
+
+
+def func(df: IntoDataFrameT) -> IntoDataFrameT:
+ df = nw.from_native(df, eager_only=True)
+ return nw.to_native(df.select(b=nw.col("a")))
+
+
+reveal_type(func(df))
+
+
+@nw.narwhalify(strict=True)
+def func_2(df: DataFrameT) -> DataFrameT:
+ return df.select(b=nw.col("a"))
+
+
+reveal_type(func_2(df))
+
Running mypy
on it gives:
+
$ mypy f.py
+f.py:11: note: Revealed type is "polars.dataframe.frame.DataFrame"
+f.py:17: note: Revealed type is "Any"
+Success: no issues found in 1 source file
+
In the first case, mypy can infer that df
is a polars.DataFrame
. In the second case, it can't.
If you want to make the most out of type hints and preserve them as much as possible, we recommend
+nw.from_native
and nw.to_native
. Type hints will still be respected
+inside the function body if you type the arguments.