Skip to content

Commit

Permalink
Merge pull request #27 from abstractqqq/complex
Browse files Browse the repository at this point in the history
  • Loading branch information
abstractqqq authored Dec 15, 2023
2 parents 63d2816 + 4414c87 commit 16166d5
Show file tree
Hide file tree
Showing 11 changed files with 346 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.ipynb_checkpoints
tests/*.ipynb
tests/test.ipynb

/target

Expand Down
3 changes: 2 additions & 1 deletion python/polars_ds/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
version = "0.1.4"

from polars_ds.num_ext import NumExt # noqa: E402
from polars_ds.complex_ext import ComplexExt # noqa: E402
from polars_ds.str_ext import StrExt # noqa: E402
from polars_ds.stats_ext import StatsExt # noqa: E402

__all__ = ["NumExt", "StrExt", "StatsExt"]
__all__ = ["NumExt", "StrExt", "StatsExt", "ComplexExt"]
189 changes: 189 additions & 0 deletions python/polars_ds/complex_ext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
"""
Tools for dealing with complex numbers columns inside Polars dataframe.
Complex number columns are represented as a column of size-2 lists. By default, an element will look like [re, im],
which is in coordinate form. All operations (except powi, which turns it into polar form internally) assume the number
is in coordinate form. There is a to_coord function provided for complex numbers in polar form [r, theta].
"""

import polars as pl
from typing import Union
import math


@pl.api.register_expr_namespace("c")
class ComplexExt:
def __init__(self, expr: pl.Expr):
self._expr: pl.Expr = expr

def re(self) -> pl.Expr:
"""Returns the real part of the complex number."""
return self._expr.list.first()

def im(self) -> pl.Expr:
"""Returns the imaginary part of the complex number."""
return self._expr.list.last()

def to_complex(self) -> pl.Expr:
"""Turns a column of floats into a column of complex with im = 0."""
return pl.concat_list(self._expr, pl.lit(0.0, dtype=pl.Float64))

def with_imag(self, other: pl.Expr) -> pl.Expr:
"""
Treats self as the real part, and other as the imaginary part and combines
them into a complex column. An alias for pl.concat_list(self._expr, other)
"""
return pl.concat_list(self._expr, other)

def modulus(self) -> pl.Expr:
"""Returns the modulus of the complex number."""
return self._expr.list.eval(pl.element().dot(pl.element()).sqrt()).list.first()

def squared_modulus(self) -> pl.Expr:
"""Returns the squared modulus of the complex number."""
return self._expr.list.eval(pl.element().dot(pl.element())).list.first()

def theta(self, degree: bool = False) -> pl.Expr:
"""Returns the polar angle (in radians by default) of the complex number."""
x = self._expr.list.first()
y = self._expr.list.last()
if degree:
return (
pl.when((x > 0) | (y != 0))
.then(pl.arctan2d(y, x))
.when((x < 0) & (y == 0))
.then(pl.lit(180.0, dtype=pl.Float64))
.otherwise(pl.lit(math.nan, dtype=pl.Float64))
)
else:
return (
pl.when((x > 0) | (y != 0))
.then(pl.arctan2(y, x))
.when((x < 0) & (y == 0))
.then(pl.lit(math.pi, dtype=pl.Float64))
.otherwise(pl.lit(math.nan, dtype=pl.Float64))
)

def to_polar(self) -> pl.Expr:
"""Turns a complex number in coordinate form into polar form."""
return pl.concat_list(self.modulus(), self.theta())

def to_coord(self) -> pl.Expr:
"""Turns a complex number in polar form into coordinate form."""
r = self._expr.list.first()
theta = self._expr.list.last()
return pl.concat_list(r * theta.cos(), r * theta.sin())

def conj(self) -> pl.Expr:
"""Returns complex conjugate."""
return pl.concat_list(self._expr.list.first(), -self._expr.list.last())

def add(self, other: Union[float, complex, pl.Expr]) -> pl.Expr:
"""
Add either a single real, complex, or another col of complex to self. If other is
an expression, it must be another col of complex numbers.
"""
if isinstance(other, float):
return self._expr.list.eval(pl.element() + pl.Series([other, 0]))
if isinstance(other, complex):
return self._expr.list.eval(pl.element() + pl.Series([other.real, other.imag]))
else:
return pl.concat_list(
self._expr.list.first() + other.list.first(),
self._expr.list.last() + other.list.last(),
)

def sub(self, other: Union[float, complex, pl.Expr]) -> pl.Expr:
"""
Subtract either a single real, complex, or another col of complex to self. If other is
an expression, it must be another col of complex numbers.
"""
if isinstance(other, float):
return self._expr.list.eval(pl.element() - pl.Series([other, 0]))
if isinstance(other, complex):
return self._expr.list.eval(pl.element() - pl.Series([other.real, other.imag]))
else:
return pl.concat_list(
self._expr.list.first() - other.list.first(),
self._expr.list.last() - other.list.last(),
)

def mul(self, other: Union[float, complex, pl.Expr]) -> pl.Expr:
"""
Multiply either a single real, complex, or another col of complex to self. If other is
an expression, it must be another col of complex numbers.
"""
if isinstance(other, float):
return self._expr.list.eval(pl.element() * pl.lit(other))
if isinstance(other, complex):
x = self._expr.list.first()
y = self._expr.list.last()
new_real = x * other.real - y * other.imag
new_imag = x * other.imag + y * other.real
return pl.concat_list(new_real, new_imag)
else:
x = self._expr.list.first()
y = self._expr.list.last()
x2 = other.list.first()
y2 = other.list.last()
new_real = x * x2 - y * y2
new_imag = x * y2 + y * x2
return pl.concat_list(new_real, new_imag)

def inv(self) -> pl.Expr:
"""Returns 1/z for a complex number z."""
x = self._expr.list.first()
y = self._expr.list.last()
denom = x.pow(2) + y.pow(2)
return pl.concat_list(x / denom, -y / denom)

def div(self, other: Union[float, complex, pl.Expr]) -> pl.Expr:
"""
Divide either a single real, complex, or another col of complex to self. If other is
an expression, it must be another col of complex numbers.
"""
if isinstance(other, float):
return self._expr.list.eval(pl.element() / pl.lit(other))
if isinstance(other, complex):
x = self._expr.list.first()
y = self._expr.list.last()
inverse = 1 / other
new_real = x * inverse.real - y * inverse.imag
new_imag = x * inverse.imag + y * inverse.real
return pl.concat_list(new_real, new_imag)
else:
x = self._expr.list.first()
y = self._expr.list.last()
x2 = other.list.first()
y2 = other.list.last()
denom = x2.pow(2) + y2.pow(2)
x_inv = x2 / denom
y_inv = -y2 / denom
new_real = x * x_inv - y * y_inv
new_imag = x * y_inv + y * x_inv
return pl.concat_list(new_real, new_imag)

def mul_by_i(self) -> pl.Expr:
"""Multiplies self by i."""
x = self._expr.list.first()
y = self._expr.list.last()
return pl.concat_list(-y, x)

def pow(self, x: float) -> pl.Expr:
"""Raises a complex number to the x power."""
if x == 0.0:
return pl.concat_list(
pl.when(self.modulus() == 0.0).then(math.nan).otherwise(1.0),
pl.lit(0.0, dtype=pl.Float64),
)
elif x == 1.0:
return self._expr
elif x == 2.0:
return self.mul(self._expr)
elif x == -1.0:
return self.inv()
else:
polar = self.to_polar()
r = polar.list.first()
theta = polar.list.last()
return pl.concat_list(r.pow(x) * (x * theta).cos(), r.pow(x) * (x * theta).sin())
45 changes: 44 additions & 1 deletion python/polars_ds/num_ext.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,38 @@
"""
Tools for dealing with well-known numerical operations and other metrics inside Polars DataFrame.
It currently contains some time series stuff such as detrend, rfft, and time series metrics like SMAPE.
"""

import polars as pl
from typing import Union, Optional
from .type_alias import DetrendMethod
from polars.utils.udfs import _get_shared_lib_location
# import math
# from polars.type_aliases import IntoExpr

lib = _get_shared_lib_location(__file__)

# TwoPi = 2.0 * math.pi


@pl.api.register_expr_namespace("num_ext")
class NumExt:
def __init__(self, expr: pl.Expr):
self._expr: pl.Expr = expr

# def _hamming_window(self, a:float = 0.5) -> pl.Expr:
# """
# Generates a hamming window the same legnth as self. By default a = 0.5, which is the Hann window.
# """
# N = self._expr.count()
# return (
# pl.lit(a, dtype=pl.Float64)
# - (pl.lit(1.0 - a, dtype = pl.Float64)) * (
# (pl.lit(TwoPi) * pl.int_range(0, N, dtype=pl.Float64, eager=False) / N).cos()
# )
# )

def binarize(self, cond: Optional[pl.Expr]) -> pl.Expr:
"""
Binarize the column by a boolean condition.
Expand Down Expand Up @@ -543,7 +565,7 @@ def list_jaccard(self, other: pl.Expr) -> pl.Expr:

def cond_entropy(self, other: pl.Expr) -> pl.Expr:
"""
Computes the conditional entropy of self(y) given other. H(y|other).
Computes the conditional entropy of self(y) given other, aka. H(y|other).
Parameters
----------
Expand Down Expand Up @@ -588,6 +610,27 @@ def lstsq(self, *others: pl.Expr, add_bias: bool = False) -> pl.Expr:
returns_scalar=True,
)

def detrend(self, method: DetrendMethod = "linear") -> pl.Expr:
"""
Detrends self using either linear/mean method.
Parameters
----------
method
Either `linear` or `mean`
"""

if method == "linear":
N = self._expr.count()
x = pl.int_range(0, N, dtype=pl.Float64, eager=False)
coeff = pl.cov(self._expr, x) / x.var()
const = self._expr.mean() - coeff * (N - 1) / 2
return self._expr - x * coeff - const
elif method == "mean":
return self._expr - self._expr.mean()
else:
raise ValueError(f"Unknown detrend method: {method}")

# Add a k step argument?
# def fft(self, forward: bool = True) -> pl.Expr:
# """
Expand Down
4 changes: 4 additions & 0 deletions python/polars_ds/stats_ext.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Tools for dealing with well-known statistical tests and random sampling inside Polars DataFrame.
"""

import polars as pl
from .type_alias import Alternative
from typing import Optional
Expand Down
4 changes: 4 additions & 0 deletions python/polars_ds/str_ext.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Tools for dealing with string similarity, common string operations like tokenize, extract numbers, etc., inside Polars DataFrame.
"""

import polars as pl
from typing import Union, Optional
from polars.utils.udfs import _get_shared_lib_location
Expand Down
1 change: 1 addition & 0 deletions python/polars_ds/type_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
else: # 3.9
from typing_extensions import TypeAlias

DetrendMethod: TypeAlias = Literal["linear", "mean"]
AhoCorasickMatchKind: TypeAlias = Literal["standard", "left_most_first", "left_most_longest"]
Alternative: TypeAlias = Literal["two-sided", "less", "greater"]
10 changes: 10 additions & 0 deletions src/num_ext/complex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use polars::prelude::*;
// use pyo3_polars::derive::polars_expr;
// use num::Complex;

pub fn complex_output(_: &[Field]) -> PolarsResult<Field> {
Ok(Field::new(
"complex",
DataType::List(Box::new(DataType::Float64)),
))
}
8 changes: 1 addition & 7 deletions src/num_ext/fft.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use super::complex::complex_output;
/// Performs forward FFT.
/// Since data in dataframe are always real numbers, only realfft
/// is implemented and inverse fft is not implemented and even if it
Expand All @@ -8,13 +9,6 @@ use polars::prelude::*;
use pyo3_polars::derive::polars_expr;
use realfft::RealFftPlanner;

fn complex_output(_: &[Field]) -> PolarsResult<Field> {
Ok(Field::new(
"complex",
DataType::List(Box::new(DataType::Float64)),
))
}

#[polars_expr(output_type_func=complex_output)]
fn pl_rfft(inputs: &[Series]) -> PolarsResult<Series> {
// Take a step argument
Expand Down
1 change: 1 addition & 0 deletions src/num_ext/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod complex;
mod cond_entropy;
mod fft;
mod gcd_lcm;
Expand Down
Loading

0 comments on commit 16166d5

Please sign in to comment.