Merge pull request #27 from abstractqqq/complex

abstractqqq · Dec 15, 2023 · 16166d5 · 16166d5
2 parents 63d2816 + 4414c87
commit 16166d5
Show file tree

Hide file tree

Showing 11 changed files with 346 additions and 9 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 .ipynb_checkpoints
 tests/*.ipynb
+tests/test.ipynb
 
 /target
 

diff --git a/python/polars_ds/__init__.py b/python/polars_ds/__init__.py
@@ -1,7 +1,8 @@
 version = "0.1.4"
 
 from polars_ds.num_ext import NumExt  # noqa: E402
+from polars_ds.complex_ext import ComplexExt  # noqa: E402
 from polars_ds.str_ext import StrExt  # noqa: E402
 from polars_ds.stats_ext import StatsExt  # noqa: E402
 
-__all__ = ["NumExt", "StrExt", "StatsExt"]
+__all__ = ["NumExt", "StrExt", "StatsExt", "ComplexExt"]
diff --git a/python/polars_ds/complex_ext.py b/python/polars_ds/complex_ext.py
@@ -0,0 +1,189 @@
+"""
+Tools for dealing with complex numbers columns inside Polars dataframe.
+
+Complex number columns are represented as a column of size-2 lists. By default, an element will look like [re, im],
+which is in coordinate form. All operations (except powi, which turns it into polar form internally) assume the number 
+is in coordinate form. There is a to_coord function provided for complex numbers in polar form [r, theta].
+"""
+
+import polars as pl
+from typing import Union
+import math
+
+
+@pl.api.register_expr_namespace("c")
+class ComplexExt:
+    def __init__(self, expr: pl.Expr):
+        self._expr: pl.Expr = expr
+
+    def re(self) -> pl.Expr:
+        """Returns the real part of the complex number."""
+        return self._expr.list.first()
+
+    def im(self) -> pl.Expr:
+        """Returns the imaginary part of the complex number."""
+        return self._expr.list.last()
+
+    def to_complex(self) -> pl.Expr:
+        """Turns a column of floats into a column of complex with im = 0."""
+        return pl.concat_list(self._expr, pl.lit(0.0, dtype=pl.Float64))
+
+    def with_imag(self, other: pl.Expr) -> pl.Expr:
+        """
+        Treats self as the real part, and other as the imaginary part and combines
+        them into a complex column. An alias for pl.concat_list(self._expr, other)
+        """
+        return pl.concat_list(self._expr, other)
+
+    def modulus(self) -> pl.Expr:
+        """Returns the modulus of the complex number."""
+        return self._expr.list.eval(pl.element().dot(pl.element()).sqrt()).list.first()
+
+    def squared_modulus(self) -> pl.Expr:
+        """Returns the squared modulus of the complex number."""
+        return self._expr.list.eval(pl.element().dot(pl.element())).list.first()
+
+    def theta(self, degree: bool = False) -> pl.Expr:
+        """Returns the polar angle (in radians by default) of the complex number."""
+        x = self._expr.list.first()
+        y = self._expr.list.last()
+        if degree:
+            return (
+                pl.when((x > 0) | (y != 0))
+                .then(pl.arctan2d(y, x))
+                .when((x < 0) & (y == 0))
+                .then(pl.lit(180.0, dtype=pl.Float64))
+                .otherwise(pl.lit(math.nan, dtype=pl.Float64))
+            )
+        else:
+            return (
+                pl.when((x > 0) | (y != 0))
+                .then(pl.arctan2(y, x))
+                .when((x < 0) & (y == 0))
+                .then(pl.lit(math.pi, dtype=pl.Float64))
+                .otherwise(pl.lit(math.nan, dtype=pl.Float64))
+            )
+
+    def to_polar(self) -> pl.Expr:
+        """Turns a complex number in coordinate form into polar form."""
+        return pl.concat_list(self.modulus(), self.theta())
+
+    def to_coord(self) -> pl.Expr:
+        """Turns a complex number in polar form into coordinate form."""
+        r = self._expr.list.first()
+        theta = self._expr.list.last()
+        return pl.concat_list(r * theta.cos(), r * theta.sin())
+
+    def conj(self) -> pl.Expr:
+        """Returns complex conjugate."""
+        return pl.concat_list(self._expr.list.first(), -self._expr.list.last())
+
+    def add(self, other: Union[float, complex, pl.Expr]) -> pl.Expr:
+        """
+        Add either a single real, complex, or another col of complex to self. If other is
+        an expression, it must be another col of complex numbers.
+        """
+        if isinstance(other, float):
+            return self._expr.list.eval(pl.element() + pl.Series([other, 0]))
+        if isinstance(other, complex):
+            return self._expr.list.eval(pl.element() + pl.Series([other.real, other.imag]))
+        else:
+            return pl.concat_list(
+                self._expr.list.first() + other.list.first(),
+                self._expr.list.last() + other.list.last(),
+            )
+
+    def sub(self, other: Union[float, complex, pl.Expr]) -> pl.Expr:
+        """
+        Subtract either a single real, complex, or another col of complex to self. If other is
+        an expression, it must be another col of complex numbers.
+        """
+        if isinstance(other, float):
+            return self._expr.list.eval(pl.element() - pl.Series([other, 0]))
+        if isinstance(other, complex):
+            return self._expr.list.eval(pl.element() - pl.Series([other.real, other.imag]))
+        else:
+            return pl.concat_list(
+                self._expr.list.first() - other.list.first(),
+                self._expr.list.last() - other.list.last(),
+            )
+
+    def mul(self, other: Union[float, complex, pl.Expr]) -> pl.Expr:
+        """
+        Multiply either a single real, complex, or another col of complex to self. If other is
+        an expression, it must be another col of complex numbers.
+        """
+        if isinstance(other, float):
+            return self._expr.list.eval(pl.element() * pl.lit(other))
+        if isinstance(other, complex):
+            x = self._expr.list.first()
+            y = self._expr.list.last()
+            new_real = x * other.real - y * other.imag
+            new_imag = x * other.imag + y * other.real
+            return pl.concat_list(new_real, new_imag)
+        else:
+            x = self._expr.list.first()
+            y = self._expr.list.last()
+            x2 = other.list.first()
+            y2 = other.list.last()
+            new_real = x * x2 - y * y2
+            new_imag = x * y2 + y * x2
+            return pl.concat_list(new_real, new_imag)
+
+    def inv(self) -> pl.Expr:
+        """Returns 1/z for a complex number z."""
+        x = self._expr.list.first()
+        y = self._expr.list.last()
+        denom = x.pow(2) + y.pow(2)
+        return pl.concat_list(x / denom, -y / denom)
+
+    def div(self, other: Union[float, complex, pl.Expr]) -> pl.Expr:
+        """
+        Divide either a single real, complex, or another col of complex to self. If other is
+        an expression, it must be another col of complex numbers.
+        """
+        if isinstance(other, float):
+            return self._expr.list.eval(pl.element() / pl.lit(other))
+        if isinstance(other, complex):
+            x = self._expr.list.first()
+            y = self._expr.list.last()
+            inverse = 1 / other
+            new_real = x * inverse.real - y * inverse.imag
+            new_imag = x * inverse.imag + y * inverse.real
+            return pl.concat_list(new_real, new_imag)
+        else:
+            x = self._expr.list.first()
+            y = self._expr.list.last()
+            x2 = other.list.first()
+            y2 = other.list.last()
+            denom = x2.pow(2) + y2.pow(2)
+            x_inv = x2 / denom
+            y_inv = -y2 / denom
+            new_real = x * x_inv - y * y_inv
+            new_imag = x * y_inv + y * x_inv
+            return pl.concat_list(new_real, new_imag)
+
+    def mul_by_i(self) -> pl.Expr:
+        """Multiplies self by i."""
+        x = self._expr.list.first()
+        y = self._expr.list.last()
+        return pl.concat_list(-y, x)
+
+    def pow(self, x: float) -> pl.Expr:
+        """Raises a complex number to the x power."""
+        if x == 0.0:
+            return pl.concat_list(
+                pl.when(self.modulus() == 0.0).then(math.nan).otherwise(1.0),
+                pl.lit(0.0, dtype=pl.Float64),
+            )
+        elif x == 1.0:
+            return self._expr
+        elif x == 2.0:
+            return self.mul(self._expr)
+        elif x == -1.0:
+            return self.inv()
+        else:
+            polar = self.to_polar()
+            r = polar.list.first()
+            theta = polar.list.last()
+            return pl.concat_list(r.pow(x) * (x * theta).cos(), r.pow(x) * (x * theta).sin())
diff --git a/python/polars_ds/num_ext.py b/python/polars_ds/num_ext.py
@@ -1,16 +1,38 @@
+"""
+Tools for dealing with well-known numerical operations and other metrics inside Polars DataFrame. 
+
+It currently contains some time series stuff such as detrend, rfft, and time series metrics like SMAPE.
+"""
+
 import polars as pl
 from typing import Union, Optional
+from .type_alias import DetrendMethod
 from polars.utils.udfs import _get_shared_lib_location
+# import math
 # from polars.type_aliases import IntoExpr
 
 lib = _get_shared_lib_location(__file__)
 
+# TwoPi = 2.0 * math.pi
+
 
 @pl.api.register_expr_namespace("num_ext")
 class NumExt:
     def __init__(self, expr: pl.Expr):
         self._expr: pl.Expr = expr
 
+    # def _hamming_window(self, a:float = 0.5) -> pl.Expr:
+    #     """
+    #     Generates a hamming window the same legnth as self. By default a = 0.5, which is the Hann window.
+    #     """
+    #     N = self._expr.count()
+    #     return (
+    #         pl.lit(a, dtype=pl.Float64)
+    #         - (pl.lit(1.0 - a, dtype = pl.Float64)) * (
+    #             (pl.lit(TwoPi) * pl.int_range(0, N, dtype=pl.Float64, eager=False) / N).cos()
+    #         )
+    #     )
+
     def binarize(self, cond: Optional[pl.Expr]) -> pl.Expr:
         """
         Binarize the column by a boolean condition.
@@ -543,7 +565,7 @@ def list_jaccard(self, other: pl.Expr) -> pl.Expr:
 
     def cond_entropy(self, other: pl.Expr) -> pl.Expr:
         """
-        Computes the conditional entropy of self(y) given other. H(y|other).
+        Computes the conditional entropy of self(y) given other, aka. H(y|other).
 
         Parameters
         ----------
@@ -588,6 +610,27 @@ def lstsq(self, *others: pl.Expr, add_bias: bool = False) -> pl.Expr:
             returns_scalar=True,
         )
 
+    def detrend(self, method: DetrendMethod = "linear") -> pl.Expr:
+        """
+        Detrends self using either linear/mean method.
+
+        Parameters
+        ----------
+        method
+            Either `linear` or `mean`
+        """
+
+        if method == "linear":
+            N = self._expr.count()
+            x = pl.int_range(0, N, dtype=pl.Float64, eager=False)
+            coeff = pl.cov(self._expr, x) / x.var()
+            const = self._expr.mean() - coeff * (N - 1) / 2
+            return self._expr - x * coeff - const
+        elif method == "mean":
+            return self._expr - self._expr.mean()
+        else:
+            raise ValueError(f"Unknown detrend method: {method}")
+
     # Add a k step argument?
     # def fft(self, forward: bool = True) -> pl.Expr:
     #     """

diff --git a/python/polars_ds/stats_ext.py b/python/polars_ds/stats_ext.py
@@ -1,3 +1,7 @@
+"""
+Tools for dealing with well-known statistical tests and random sampling inside Polars DataFrame. 
+"""
+
 import polars as pl
 from .type_alias import Alternative
 from typing import Optional

diff --git a/python/polars_ds/str_ext.py b/python/polars_ds/str_ext.py
@@ -1,3 +1,7 @@
+"""
+Tools for dealing with string similarity, common string operations like tokenize, extract numbers, etc., inside Polars DataFrame.
+"""
+
 import polars as pl
 from typing import Union, Optional
 from polars.utils.udfs import _get_shared_lib_location

diff --git a/python/polars_ds/type_alias.py b/python/polars_ds/type_alias.py
@@ -6,5 +6,6 @@
 else:  # 3.9
     from typing_extensions import TypeAlias
 
+DetrendMethod: TypeAlias = Literal["linear", "mean"]
 AhoCorasickMatchKind: TypeAlias = Literal["standard", "left_most_first", "left_most_longest"]
 Alternative: TypeAlias = Literal["two-sided", "less", "greater"]
diff --git a/src/num_ext/complex.rs b/src/num_ext/complex.rs
@@ -0,0 +1,10 @@
+use polars::prelude::*;
+// use pyo3_polars::derive::polars_expr;
+// use num::Complex;
+
+pub fn complex_output(_: &[Field]) -> PolarsResult<Field> {
+    Ok(Field::new(
+        "complex",
+        DataType::List(Box::new(DataType::Float64)),
+    ))
+}
diff --git a/src/num_ext/fft.rs b/src/num_ext/fft.rs
@@ -1,3 +1,4 @@
+use super::complex::complex_output;
 /// Performs forward FFT.
 /// Since data in dataframe are always real numbers, only realfft
 /// is implemented and inverse fft is not implemented and even if it
@@ -8,13 +9,6 @@ use polars::prelude::*;
 use pyo3_polars::derive::polars_expr;
 use realfft::RealFftPlanner;
 
-fn complex_output(_: &[Field]) -> PolarsResult<Field> {
-    Ok(Field::new(
-        "complex",
-        DataType::List(Box::new(DataType::Float64)),
-    ))
-}
-
 #[polars_expr(output_type_func=complex_output)]
 fn pl_rfft(inputs: &[Series]) -> PolarsResult<Series> {
     // Take a step argument

diff --git a/src/num_ext/mod.rs b/src/num_ext/mod.rs
@@ -1,3 +1,4 @@
+mod complex;
 mod cond_entropy;
 mod fft;
 mod gcd_lcm;