Skip to content

Commit

Permalink
RFC, feat: add .select (by str) for duckdb and ibis backend (#1266)
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi authored Oct 28, 2024
1 parent 425dbe4 commit 0b333f9
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 1 deletion.
13 changes: 13 additions & 0 deletions docs/extending.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def func(df: FrameT) -> FrameT:
b_std=nw.col("b").std(),
)
```

will work for any of pandas, Polars, cuDF, Modin, and PyArrow.

However, sometimes you don't need to do complex operations on dataframes - all you need
Expand All @@ -57,9 +58,21 @@ def func(df: Any) -> Schema:
df = nw.from_native(df, eager_or_interchange_only=True)
return df.schema
```

is also supported, meaning that, in addition to the libraries mentioned above, you can
also pass Ibis, DuckDB, Vaex, and any library which implements the protocol.

#### Interchange-only support

While libraries for which we have full support can benefit from the whole Narwhals API,
libraries which have interchange only support can access the following methods after
converting to Narwhals DataFrame:

- `.schema`, hence column names via `.schema.names()` and column types via `.schema.dtypes()`
- `.to_pandas()` and `.to_arrow()`, for converting to Pandas and Arrow, respectively.
- `.select(names)` (Ibis and DuckDB), where `names` is a list of (string) column names. This is useful for
selecting columns before converting to another library.

### Extending Narwhals

If you want your own library to be recognised too, you're welcome open a PR (with tests)!.
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Extremely lightweight and extensible compatibility layer between dataframe libra

- **Full API support**: cuDF, Modin, pandas, Polars, PyArrow
- **Lazy-only support**: Dask
- **Interchange-level support**: Ibis, Vaex, anything else which implements the DataFrame Interchange Protocol
- **Interchange-level support**: Ibis, DuckDB, Vaex, anything else which implements the DataFrame Interchange Protocol

Seamlessly support all, without depending on any!

Expand Down
19 changes: 19 additions & 0 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,22 @@ def __getitem__(self, item: str) -> DuckDBInterchangeSeries:
self._native_frame.select(item), dtypes=self._dtypes
)

def select(
self: Self,
*exprs: Any,
**named_exprs: Any,
) -> Self:
if named_exprs or not all(isinstance(x, str) for x in exprs): # pragma: no cover
msg = (
"`select`-ing not by name is not supported for DuckDB backend.\n\n"
"If you would like to see this kind of object better supported in "
"Narwhals, please open a feature request "
"at https://github.com/narwhals-dev/narwhals/issues."
)
raise NotImplementedError(msg)

return self._from_native_frame(self._native_frame.select(*exprs))

def __getattr__(self, attr: str) -> Any:
if attr == "schema":
return {
Expand Down Expand Up @@ -120,3 +136,6 @@ def to_pandas(self: Self) -> pd.DataFrame:

def to_arrow(self: Self) -> pa.Table:
return self._native_frame.arrow()

def _from_native_frame(self: Self, df: Any) -> Self:
return self.__class__(df, dtypes=self._dtypes)
21 changes: 21 additions & 0 deletions narwhals/_ibis/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,24 @@ def to_pandas(self: Self) -> pd.DataFrame:
def to_arrow(self: Self) -> pa.Table:
return self._native_frame.to_pyarrow()

def select(
self: Self,
*exprs: Any,
**named_exprs: Any,
) -> Self:
if named_exprs or not all(isinstance(x, str) for x in exprs): # pragma: no cover
msg = (
"`select`-ing not by name is not supported for Ibis backend.\n\n"
"If you would like to see this kind of object better supported in "
"Narwhals, please open a feature request "
"at https://github.com/narwhals-dev/narwhals/issues."
)
raise NotImplementedError(msg)

import ibis.selectors as s

return self._from_native_frame(self._native_frame.select(s.cols(*exprs)))

def __getattr__(self, attr: str) -> Any:
if attr == "schema":
return {
Expand All @@ -98,3 +116,6 @@ def __getattr__(self, attr: str) -> Any:
"at https://github.com/narwhals-dev/narwhals/issues."
)
raise NotImplementedError(msg)

def _from_native_frame(self: Self, df: Any) -> Self:
return self.__class__(df, dtypes=self._dtypes)
46 changes: 46 additions & 0 deletions tests/frame/interchange_select_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

import duckdb
import polars as pl
import pytest

import narwhals.stable.v1 as nw

data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}


def test_interchange() -> None:
df_pl = pl.DataFrame(data)
df = nw.from_native(df_pl.__dataframe__(), eager_or_interchange_only=True)
with pytest.raises(
NotImplementedError,
match="Attribute select is not supported for metadata-only dataframes",
):
df.select("a", "z")


def test_interchange_ibis(
tmpdir: pytest.TempdirFactory,
) -> None: # pragma: no cover
ibis = pytest.importorskip("ibis")
df_pl = pl.DataFrame(data)

filepath = str(tmpdir / "file.parquet") # type: ignore[operator]
df_pl.write_parquet(filepath)

tbl = ibis.read_parquet(filepath)
df = nw.from_native(tbl, eager_or_interchange_only=True)

out_cols = df.select("a", "z").schema.names()

assert out_cols == ["a", "z"]


def test_interchange_duckdb() -> None:
df_pl = pl.DataFrame(data) # noqa: F841
rel = duckdb.sql("select * from df_pl")
df = nw.from_native(rel, eager_or_interchange_only=True)

out_cols = df.select("a", "z").schema.names()

assert out_cols == ["a", "z"]

0 comments on commit 0b333f9

Please sign in to comment.