diff --git a/.gitignore b/.gitignore index 96421a26..0dd794ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -tests/*.ipynb +test.ipynb +.ipynb_checkpoints /target @@ -73,7 +74,6 @@ docs/_build/ # Pyenv .python-version - # Polars Extension .so .dll \ No newline at end of file diff --git a/src/num_ext/expressions.rs b/src/num_ext/expressions.rs index dc4210f1..8fccb02e 100644 --- a/src/num_ext/expressions.rs +++ b/src/num_ext/expressions.rs @@ -1,4 +1,4 @@ -use faer::{prelude::*, MatRef, Side}; +use faer::{prelude::*, MatRef}; use faer::{IntoFaer, IntoNdarray}; // use faer::polars::{polars_to_faer_f64, Frame}; use ndarray::{Array1, Array2}; diff --git a/tests/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/tests/.ipynb_checkpoints/Untitled-checkpoint.ipynb deleted file mode 100644 index 9136eb54..00000000 --- a/tests/.ipynb_checkpoints/Untitled-checkpoint.ipynb +++ /dev/null @@ -1,217 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "529f4422-5c3a-4bd6-abe0-a15edfc62abb", - "metadata": {}, - "outputs": [], - "source": [ - "from polars_ds.extensions import StrExt, NumExt\n", - "import polars as pl\n", - "import numpy as np " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "65b702bd-fbc3-447f-a870-7d7faa222ceb", - "metadata": {}, - "outputs": [], - "source": [ - "size = 2048\n", - "df = pl.DataFrame({\n", - " \"a\": np.sin(np.array(range(size))),\n", - " \"b\": np.sin(np.array(range(size))),\n", - " \"c\": np.sin(np.array(range(size))),\n", - " \"d\": np.sin(np.array(range(size))),\n", - " \"e\": np.sin(np.array(range(size))),\n", - "})\n", - "npa = df[\"a\"].to_numpy()\n", - "npb = df[\"b\"].to_numpy()\n", - "npc = df[\"c\"].to_numpy()\n", - "npd = df[\"d\"].to_numpy()\n", - "npe = df[\"e\"].to_numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5def88d8-7c33-4eb9-8ced-8a2892a6e754", - "metadata": {}, - "outputs": [], - "source": [ - "%%timeit\n", - "np.fft.rfft(npa).real" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "08e70184-72ab-4815-814c-e25f0b3c1d64", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "df.select(\n", - " pl.col(\"a\").num_ext.fft().struct.field(\"re\"),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "87868f55-db7b-4b58-bfa7-56e6fbd1130d", - "metadata": {}, - "outputs": [], - "source": [ - "%%timeit\n", - "df.select(\n", - " pl.col(\"a\").num_ext.fft(),\n", - " pl.col(\"b\").num_ext.fft(),\n", - " pl.col(\"c\").num_ext.fft(),\n", - " pl.col(\"d\").num_ext.fft(),\n", - " pl.col(\"e\").num_ext.fft()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "78dae23d-f7aa-41f2-856f-18da70170828", - "metadata": {}, - "outputs": [], - "source": [ - "%%timeit \n", - "_ = np.fft.rfft(npa)\n", - "_ = np.fft.rfft(npb)\n", - "_ = np.fft.rfft(npc)\n", - "_ = np.fft.rfft(npd)\n", - "_ = np.fft.rfft(npe)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d6379fb-b2f5-45e4-bb19-d66a21d4c86e", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f82ebcaa-25df-4ddd-b166-55f06e979593", - "metadata": {}, - "outputs": [], - "source": [ - "df = pl.DataFrame({\n", - " \"dummy\": [\"a\"] * 50_000 + [\"b\"] * 50_000,\n", - " \"a\": range(100_000),\n", - " \"b\": range(-100_000, 0),\n", - " \"y\": pl.Series(range(100_000, 200_000)) + 0.5\n", - "})\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "459f56fa-8821-47ba-b1d3-53680625a79d", - "metadata": {}, - "outputs": [], - "source": [ - "df = pl.DataFrame({\n", - " \"y\":[1,2,3,4,5],\n", - " \"a\": [2,3,4,5,6],\n", - " \"b\"\n", - "})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "84fe99db-1729-46a3-b063-795ec7ea039d", - "metadata": {}, - "outputs": [], - "source": [ - "%%timeit\n", - "df.select(\n", - " pl.col(\"y\").num_ext.lstsq(pl.col(\"a\"), add_bias=True)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5821d4d0-fe4f-4864-9d56-0a2c0ef03334", - "metadata": {}, - "outputs": [], - "source": [ - "df.group_by(\"dummy\").agg(\n", - " pl.col(\"y\").num_ext.lstsq(pl.col(\"a\"), pl.col(\"b\"), add_bias = True)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3f9ad447-c7bb-4830-b996-882a382f0854", - "metadata": {}, - "outputs": [], - "source": [ - "df.select(pl.col(\"y\").num_ext.lstsq(pl.col(\"a\"), pl.col(\"b\")))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff9b6b21-532a-4655-a8a4-e5396c760a67", - "metadata": {}, - "outputs": [], - "source": [ - "df.select(\n", - " pl.col(\"y\").num_ext.lstsq(pl.col(\"a\"), pl.col(\"b\")).over(\"dummy\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9acfcf37-85f9-44d6-96b4-897cd3fca4e6", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "803ba306-791f-4a1e-a5b6-a6d6b9b55447", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/Untitled.ipynb b/tests/Untitled.ipynb deleted file mode 100644 index 5f5ac24a..00000000 --- a/tests/Untitled.ipynb +++ /dev/null @@ -1,275 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "529f4422-5c3a-4bd6-abe0-a15edfc62abb", - "metadata": {}, - "outputs": [], - "source": [ - "from polars_ds.extensions import StrExt, NumExt\n", - "import polars as pl\n", - "import numpy as np " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "65b702bd-fbc3-447f-a870-7d7faa222ceb", - "metadata": {}, - "outputs": [], - "source": [ - "size = 2048\n", - "df = pl.DataFrame({\n", - " \"a\": np.sin(np.array(range(size))),\n", - " \"b\": np.sin(np.array(range(size))),\n", - " \"c\": np.sin(np.array(range(size))),\n", - " \"d\": np.sin(np.array(range(size))),\n", - " \"e\": np.sin(np.array(range(size))),\n", - "})\n", - "npa = df[\"a\"].to_numpy()\n", - "npb = df[\"b\"].to_numpy()\n", - "npc = df[\"c\"].to_numpy()\n", - "npd = df[\"d\"].to_numpy()\n", - "npe = df[\"e\"].to_numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5def88d8-7c33-4eb9-8ced-8a2892a6e754", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12.4 µs ± 71.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "np.fft.rfft(npa).real" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "08e70184-72ab-4815-814c-e25f0b3c1d64", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "114 µs ± 4.7 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "df.select(\n", - " pl.col(\"a\").num_ext.fft().struct.field(\"re\"),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "87868f55-db7b-4b58-bfa7-56e6fbd1130d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "216 µs ± 6.39 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "df.select(\n", - " pl.col(\"a\").num_ext.fft(),\n", - " pl.col(\"b\").num_ext.fft(),\n", - " pl.col(\"c\").num_ext.fft(),\n", - " pl.col(\"d\").num_ext.fft(),\n", - " pl.col(\"e\").num_ext.fft()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "78dae23d-f7aa-41f2-856f-18da70170828", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "51.1 µs ± 312 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" - ] - } - ], - "source": [ - "%%timeit \n", - "_ = np.fft.rfft(npa)\n", - "_ = np.fft.rfft(npb)\n", - "_ = np.fft.rfft(npc)\n", - "_ = np.fft.rfft(npd)\n", - "_ = np.fft.rfft(npe)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d6379fb-b2f5-45e4-bb19-d66a21d4c86e", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f82ebcaa-25df-4ddd-b166-55f06e979593", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
dummy | a | b | y |
---|---|---|---|
str | i64 | i64 | f64 |
"a" | 0 | -100000 | 100000.5 |
"a" | 1 | -99999 | 100001.5 |
"a" | 2 | -99998 | 100002.5 |
"a" | 3 | -99997 | 100003.5 |
"a" | 4 | -99996 | 100004.5 |