diff --git a/Cargo.lock b/Cargo.lock index 64c9af89..a3c6d6ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "polars_ds" -version = "0.2.0" +version = "0.2.1" dependencies = [ "aho-corasick", "faer", diff --git a/Cargo.toml b/Cargo.toml index 9887ed81..758bf355 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "polars_ds" -version = "0.2.0" +version = "0.2.1" edition = "2021" [lib] diff --git a/pyproject.toml b/pyproject.toml index 347bec21..8d0cf70a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "maturin" [project] name = "polars_ds" requires-python = ">=3.9" -version = "0.2.0" +version = "0.2.1" license = {file = "LICENSE.txt"} classifiers = [ diff --git a/python/polars_ds/__init__.py b/python/polars_ds/__init__.py index 0feb3815..b30b0db6 100644 --- a/python/polars_ds/__init__.py +++ b/python/polars_ds/__init__.py @@ -7,7 +7,7 @@ from polars_ds.str2 import StrExt # noqa: E402 from polars_ds.stats import StatsExt # noqa: E402 -version = "0.2.0" +version = "0.2.1" __all__ = ["NumExt", "StrExt", "StatsExt", "ComplexExt"] diff --git a/src/num_ext/cond_entropy.rs b/src/num_ext/cond_entropy.rs index 4be6702b..b93cd0fd 100644 --- a/src/num_ext/cond_entropy.rs +++ b/src/num_ext/cond_entropy.rs @@ -3,12 +3,11 @@ use pyo3_polars::derive::polars_expr; #[polars_expr(output_type=Float64)] fn pl_conditional_entropy(inputs: &[Series]) -> PolarsResult { - let x = inputs[0].name(); - let y = inputs[1].name(); - let out_name = format!("H({x}|{y})"); - let out_name = out_name.as_str(); + let x = "x"; + let y = "y"; + let out_name = "H(x|y)"; - let df = DataFrame::new(inputs.to_vec())?; + let df = df!(x => inputs[0].clone(), y => inputs[1].clone())?; let mut out = df .lazy() .group_by([col(x), col(y)]) diff --git a/src/num_ext/entrophies.rs b/src/num_ext/entrophies.rs index 9fb9795c..f67f6cee 100644 --- a/src/num_ext/entrophies.rs +++ b/src/num_ext/entrophies.rs @@ -11,19 +11,27 @@ fn pl_approximate_entropy(inputs: &[Series], kwargs: KdtreeKwargs) -> PolarsResu // inputs[0] is radius, the rest are the shifted columns // Set up radius. r is a scalar and set up at Python side. let radius = inputs[0].f64()?; + let name = inputs[1].name(); if radius.get(0).is_none() { - return Ok(Series::from_vec("", vec![f64::NAN])); + return Ok(Series::from_vec(name, vec![f64::NAN])); } let r = radius.get(0).unwrap(); // Set up params - let data = DataFrame::new(inputs[1..].to_vec())?.agg_chunks(); + let dim = inputs[1..].len(); + let mut vs:Vec = Vec::with_capacity(dim); + for (i, s) in inputs[1..].into_iter().enumerate() { + let news = s + .rechunk() + .with_name(&i.to_string()); + vs.push(news) + } + let data = DataFrame::new(vs)?; let n1 = data.height(); // This is equal to original length - m + 1 let data = data.to_ndarray::(IndexOrder::C)?; // Here, dim equals to run_length + 1, or m + 1 // + 1 because I am intentionally generating one more, so that we do to_ndarray only once. - let dim = inputs[1..].len(); if (n1 < dim) || (r <= 0.) || (!r.is_finite()) { - return Ok(Series::from_vec("", vec![f64::NAN])); + return Ok(Series::from_vec(name, vec![f64::NAN])); } let parallel = kwargs.parallel; let leaf_size = kwargs.leaf_size; @@ -48,7 +56,7 @@ fn pl_approximate_entropy(inputs: &[Series], kwargs: KdtreeKwargs) -> PolarsResu / n2 as f64; // Output - Ok(Series::from_vec("", vec![(phi_m1 - phi_m).abs()])) + Ok(Series::from_vec(name, vec![(phi_m1 - phi_m).abs()])) } #[polars_expr(output_type=Float64)] @@ -56,19 +64,27 @@ fn pl_sample_entropy(inputs: &[Series], kwargs: KdtreeKwargs) -> PolarsResult = Vec::with_capacity(dim); + for (i, s) in inputs[1..].into_iter().enumerate() { + let news = s + .rechunk() + .with_name(&i.to_string()); + vs.push(news) + } + let data = DataFrame::new(vs)?; let n1 = data.height(); // This is equal to original length - m + 1 let data = data.to_ndarray::(IndexOrder::C)?; // Here, dim equals to run_length + 1, or m + 1 // + 1 because I am intentionally generating one more, so that we do to_ndarray only once. - let dim = inputs[1..].len(); if (n1 < dim) || (r <= 0.) || (!r.is_finite()) { - return Ok(Series::from_vec("", vec![f64::NAN])); + return Ok(Series::from_vec(name, vec![f64::NAN])); } let parallel = kwargs.parallel; let leaf_size = kwargs.leaf_size; @@ -85,5 +101,5 @@ fn pl_sample_entropy(inputs: &[Series], kwargs: KdtreeKwargs) -> PolarsResult PolarsResult { fn pl_knn_ptwise(inputs: &[Series], kwargs: KdtreeKwargs) -> PolarsResult { // Set up params let id = inputs[0].u64()?; - let data = DataFrame::new(inputs[1..].to_vec())?.agg_chunks(); - + let dim = inputs[1..].len(); if dim == 0 { return Err(PolarsError::ComputeError( "KNN: No column to decide distance from.".into(), )); } - + let mut vs:Vec = Vec::with_capacity(dim); + for (i, s) in inputs[1..].into_iter().enumerate() { + let news = s + .rechunk() + .with_name(&i.to_string()); + vs.push(news) + } + let data = DataFrame::new(vs)?; let k = kwargs.k; let leaf_size = kwargs.leaf_size; let parallel = kwargs.parallel; @@ -141,7 +147,14 @@ fn pl_knn_pt(inputs: &[Series], kwargs: KdtreeKwargs) -> PolarsResult { let p = p.as_slice().unwrap(); // Rechunked, so safe to unwrap // Set up params - let data = DataFrame::new(inputs[1..].to_vec())?.agg_chunks(); + let mut vs:Vec = Vec::with_capacity(dim); + for (i, s) in inputs[1..].into_iter().enumerate() { + let news = s + .rechunk() + .with_name(&i.to_string()); + vs.push(news) + } + let data = DataFrame::new(vs)?; let height = data.height(); let dim = inputs[1..].len(); let k = kwargs.k; @@ -213,19 +226,26 @@ fn pl_nb_cnt(inputs: &[Series], kwargs: KdtreeKwargs) -> PolarsResult { let radius = inputs[0].f64()?; // Set up params - let data = DataFrame::new(inputs[1..].to_vec())?.agg_chunks(); let dim = inputs[1..].len(); if dim == 0 { return Err(PolarsError::ComputeError( "KNN: No column to decide distance from.".into(), )); } + + let mut vs:Vec = Vec::with_capacity(dim); + for (i, s) in inputs[1..].into_iter().enumerate() { + let news = s + .rechunk() + .with_name(&i.to_string()); + vs.push(news) + } + let data = DataFrame::new(vs)?; + let height = data.height(); let parallel = kwargs.parallel; let leaf_size = kwargs.leaf_size; let dist_func = which_distance(kwargs.metric.as_str(), dim)?; - // Need to use C order because C order is row-contiguous - let height = data.height(); let data = data.to_ndarray::(IndexOrder::C)?; // Building the tree diff --git a/tests/test.ipynb b/tests/test.ipynb index 1ab1de73..60b7b417 100644 --- a/tests/test.ipynb +++ b/tests/test.ipynb @@ -18,7 +18,11 @@ "id": "f0aef69b", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df = pl.DataFrame({\n", + " \n", + "})" + ] } ], "metadata": {