narwhals-dev · lucas-nelson-uiuc · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025
diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
 
+import operator
+from functools import reduce
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
+from typing import Iterable
 from typing import Literal
 from typing import Sequence
 
@@ -11,6 +14,7 @@
 from narwhals._spark_like.utils import get_column_name
 from narwhals._spark_like.utils import maybe_evaluate
 from narwhals.typing import CompliantExpr
+from narwhals.typing import IntoExpr
 from narwhals.utils import Implementation
 from narwhals.utils import parse_version
 
@@ -289,6 +293,90 @@ def count(self) -> Self:
 
         return self._from_call(F.count, "count", returns_scalar=True)
 
+    def drop_nulls(self) -> Self:
+        def _drop_nulls(_input: Column) -> Column:
+            from pyspark.sql import functions as F  # noqa: N812
+
+            return F.explode(F.filter(F.array(_input), F.isnotnull))
+
+        return self._from_call(_drop_nulls, "drop_nulls", returns_scalar=True)
+
+    def fill_null(
+        self,
+        value: Any | None = None,
+        strategy: Literal["forward", "backward"] | None = None,
+        limit: int | None = None,
+    ) -> Self:
+        def _fill_null(
+            _input: Column,
+            value: Any | None = None,
+            strategy: Literal["forward", "backward"] | None = None,
+            limit: int | None = None,
+        ) -> Column:
+            from pyspark.sql import Window
+            from pyspark.sql import functions as F  # noqa: N812
+
+            if strategy is not None:
+                match strategy:
+                    case "forward":
+                        lower_limit = (
+                            Window.unboundedPreceding if limit is None else -limit
+                        )
+                        window_spec = Window.orderBy(
+                            F.monotonically_increasing_id()
+                        ).rowsBetween(lower_limit, 0)
+                        fill_value = F.last(_input, ignorenulls=True).over(window_spec)
+                    case "backward":
+                        upper_limit = (
+                            Window.unboundedFollowing if limit is None else limit
+                        )
+                        window_spec = Window.orderBy(
+                            F.monotonically_increasing_id()
+                        ).rowsBetween(0, upper_limit)
+                        fill_value = F.first(_input, ignorenulls=True).over(window_spec)
+            else:
+                fill_value = F.lit(value)
+
+            return F.ifnull(_input, fill_value)
+
+        return self._from_call(
+            _fill_null,
+            "fill_null",
+            value=value,
+            strategy=strategy,
+            limit=limit,
+            returns_scalar=True,
+        )
+
+    def filter(
+        self, *predicates: IntoExpr | Iterable[IntoExpr], **constraints: Any
+    ) -> Self:
+        def _filter(
+            _input: Column, predicates: Iterable[IntoExpr], constraints: Any
+        ) -> Column:
+            from pyspark.sql import functions as F  # noqa: N812
+
+            if constraints:
+                predicates = (
+                    *predicates,
+                    *[
+                        operator.eq(F.col(key), value)
+                        for key, value in constraints.items()
+                    ],
+                )
+            query = reduce(operator.and_, predicates)
+            return F.explode(
+                F.filter(F.array(_input), lambda _: query)
+            )  # TODO (unauthored): resolve PySparkValueError: [HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN] Function `<lambda>` should return Column, got SparkLikeExpr.
+
+        return self._from_call(
+            _filter,
+            "filter",
+            predicates=predicates,
+            constraints=constraints,
+            returns_scalar=self._returns_scalar,
+        )
+
     def max(self) -> Self:
         from pyspark.sql import functions as F  # noqa: N812
 

diff --git a/tests/expr_and_series/fill_null_test.py b/tests/expr_and_series/fill_null_test.py
@@ -12,9 +12,7 @@
 from tests.utils import assert_equal_data
 
 
-def test_fill_null(request: pytest.FixtureRequest, constructor: Constructor) -> None:
-    if "pyspark" in str(constructor):
-        request.applymarker(pytest.mark.xfail)
+def test_fill_null(constructor: Constructor) -> None:
     data = {
         "a": [0.0, None, 2, 3, 4],
         "b": [1.0, None, None, 5, 3],
@@ -52,7 +50,7 @@ def test_fill_null_exceptions(constructor: Constructor) -> None:
 def test_fill_null_strategies_with_limit_as_none(
     constructor: Constructor, request: pytest.FixtureRequest
 ) -> None:
-    if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
+    if "duckdb" in str(constructor):
         request.applymarker(pytest.mark.xfail)
     data_limits = {
         "a": [1, None, None, None, 5, 6, None, None, None, 10],
@@ -122,7 +120,7 @@ def test_fill_null_strategies_with_limit_as_none(
 def test_fill_null_limits(
     constructor: Constructor, request: pytest.FixtureRequest
 ) -> None:
-    if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
+    if "duckdb" in str(constructor):
         request.applymarker(pytest.mark.xfail)
     context: Any = (
         pytest.raises(NotImplementedError, match="The limit keyword is not supported")