Skip to content

Commit

Permalink
Merge branch 'main' into feat/1696-ml-patterns-tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
philip-ndikum authored Jan 2, 2025
2 parents 14ed394 + 44d449d commit 5f240d2
Show file tree
Hide file tree
Showing 32 changed files with 2,343 additions and 964 deletions.
8 changes: 5 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,11 @@ If you add code that should be tested, please add tests.
- To run unit tests and doctests at the same time, run `pytest tests narwhals --cov=narwhals --doctest-modules`
- To run tests multiprocessed, you may also want to use [pytest-xdist](https://github.com/pytest-dev/pytest-xdist) (optional)
- To choose which backends to run tests with you, you can use the `--constructors` flag:
- to only run tests for pandas, Polars, and PyArrow, use `pytest --constructors=pandas,pyarrow,polars`
- to run tests for all CPU constructors, use `pytest --all-cpu-constructors`
- by default, tests run for pandas, pandas (PyArrow dtypes), PyArrow, and Polars.
- To only run tests for pandas, Polars, and PyArrow, use `pytest --constructors=pandas,pyarrow,polars`
- To run tests for all CPU constructors, use `pytest --all-cpu-constructors`
- By default, tests run for pandas, pandas (PyArrow dtypes), PyArrow, and Polars.
- To run tests using `cudf.pandas`, run `NARWHALS_DEFAULT_CONSTRUCTORS=pandas python -m cudf.pandas -m pytest`
- To run tests using `polars[gpu]`, run `NARWHALS_POLARS_GPU=1 pytest --constructors=polars[lazy]`
If you want to have less surprises when opening a PR, you can take advantage of [nox](https://nox.thea.codes/en/stable/index.html) to run the entire CI/CD test suite locally in your operating system.
Expand Down
4 changes: 2 additions & 2 deletions docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,5 @@ Let's learn about what you just did, and what Narwhals can do for you!

!!! info

These examples are using pandas, Polars and PyArrow, however Narwhals supports
other dataframe libraries (See [supported libraries](extending.md)).
These examples are using pandas, Polars, and PyArrow, however Narwhals
supports other dataframe libraries (See [supported libraries](extending.md)).
8 changes: 8 additions & 0 deletions narwhals/_spark_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ def _min(_input: Column) -> Column:

return self._from_call(_min, "min", returns_scalar=True)

def sum(self) -> Self:
def _sum(_input: Column) -> Column:
from pyspark.sql import functions as F # noqa: N812

return F.sum(_input)

return self._from_call(_sum, "sum", returns_scalar=True)

def std(self: Self, ddof: int) -> Self:
from functools import partial

Expand Down
27 changes: 27 additions & 0 deletions narwhals/_spark_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,30 @@ def col(self, *column_names: str) -> SparkLikeExpr:
return SparkLikeExpr.from_column_names(
*column_names, backend_version=self._backend_version, version=self._version
)

def sum_horizontal(self, *exprs: IntoSparkLikeExpr) -> SparkLikeExpr:
parsed_exprs = parse_into_exprs(*exprs, namespace=self)

def func(df: SparkLikeLazyFrame) -> list[Column]:
import pyspark.sql.functions as F # noqa: N812

cols = [c for _expr in parsed_exprs for c in _expr(df)]
col_name = get_column_name(df, cols[0])
return [
reduce(
operator.add,
(F.coalesce(col, F.lit(0)) for col in cols),
).alias(col_name)
]

return SparkLikeExpr( # type: ignore[abstract]
call=func,
depth=max(x._depth for x in parsed_exprs) + 1,
function_name="sum_horizontal",
root_names=combine_root_names(parsed_exprs),
output_names=reduce_output_names(parsed_exprs),
returns_scalar=False,
backend_version=self._backend_version,
version=self._version,
kwargs={"exprs": exprs},
)
43 changes: 11 additions & 32 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from narwhals.translate import to_native
from narwhals.utils import find_stacklevel
from narwhals.utils import flatten
from narwhals.utils import generate_repr
from narwhals.utils import is_sequence_but_not_str
from narwhals.utils import parse_version

Expand Down Expand Up @@ -414,18 +415,7 @@ def __array__(self, dtype: Any = None, copy: bool | None = None) -> np.ndarray:
return self._compliant_frame.__array__(dtype, copy=copy)

def __repr__(self) -> str: # pragma: no cover
header = " Narwhals DataFrame "
length = len(header)
return (
"┌"
+ "─" * length
+ "┐\n"
+ f"|{header}|\n"
+ "| Use `.to_native` to see native output |\n"
+ "└"
+ "─" * length
+ "┘"
)
return generate_repr("Narwhals DataFrame", self.to_native().__repr__())

def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
"""Export a DataFrame via the Arrow PyCapsule Interface.
Expand Down Expand Up @@ -3581,26 +3571,14 @@ def __init__(
raise AssertionError(msg)

def __repr__(self) -> str: # pragma: no cover
header = " Narwhals LazyFrame "
length = len(header)
return (
"┌"
+ "─" * length
+ "┐\n"
+ f"|{header}|\n"
+ "| Use `.to_native` to see native output |\n"
+ "└"
+ "─" * length
+ "┘"
)
return generate_repr("Narwhals LazyFrame", self.to_native().__repr__())

@property
def implementation(self) -> Implementation:
"""Return implementation of native frame.
This can be useful when you need to some special-casing for
some libraries for features outside of Narwhals' scope - for
example, when dealing with pandas' Period Dtype.
This can be useful when you need to use special-casing for features outside of
Narwhals' scope - for example, when dealing with pandas' Period Dtype.
Returns:
Implementation.
Expand Down Expand Up @@ -3640,11 +3618,12 @@ def collect(self) -> DataFrame[Any]:
... }
... )
>>> lf = nw.from_native(lf_pl)
>>> lf
┌───────────────────────────────────────┐
| Narwhals LazyFrame |
| Use `.to_native` to see native output |
└───────────────────────────────────────┘
>>> lf # doctest:+ELLIPSIS
┌─────────────────────────────┐
| Narwhals LazyFrame |
|-----------------------------|
|<LazyFrame at ...
└─────────────────────────────┘
>>> df = lf.group_by("a").agg(nw.all().sum()).collect()
>>> df.to_native().sort("a")
shape: (3, 3)
Expand Down
Loading

0 comments on commit 5f240d2

Please sign in to comment.