diff --git a/.gitignore b/.gitignore
index 0dd794ee..e34cbaa6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
-test.ipynb
 .ipynb_checkpoints
+tests/*.ipynb
 
 /target
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..54640df3
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,12 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.1.5
+  hooks:
+    # Run the linter.
+    - id: ruff
+      types_or: [ python, pyi]
+      args: [ --fix ]
+    # Run the formatter.
+    - id: ruff-format
+      types_or: [ python, pyi]
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 90de36ef..4a0dcee7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1463,7 +1463,7 @@ dependencies = [
 
 [[package]]
 name = "polars_ds"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
  "faer",
  "hashbrown",
diff --git a/Cargo.toml b/Cargo.toml
index 3dbfd378..e8de3168 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "polars_ds"
-version = "0.1.0"
+version = "0.1.1"
 edition = "2021"
 
 [lib]
@@ -12,13 +12,13 @@ crate-type = ["cdylib"]
 [dependencies]
 pyo3 = {version = "0.20", features = ["extension-module"]}
 pyo3-polars = {version = "0.8", features = ["derive"]}
-polars = {version = "0.34", features = ["performant", "nightly", "chunked_ids", "lazy", "dtype-struct", "ndarray", "log"]}
+polars = {version = "0.34", features = ["performant", "chunked_ids", "lazy", "dtype-struct", "ndarray", "log", "nightly"]}
 num = "0.4.1"
 faer = {version = "0.14.1", features = ["ndarray", "nightly"]}
 ndarray = "0.15.6"
-hashbrown = "0.14.2"
+hashbrown = {version = "0.14.2", features=["nightly"]}
 rustfft = "6.1.0"
-
+ 
 [target.'cfg(target_os = "linux")'.dependencies]
 jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
 
@@ -27,4 +27,4 @@ mimalloc = { version = "0.1", default-features = false }
 
 [profile.release]
 codegen-units = 1
-lto = "fat"
+# lto = "fat"
diff --git a/Makefile b/Makefile
index 830231d1..9fb892d7 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,8 @@ install: venv
 dev-release: venv
 	unset CONDA_PREFIX && \
 	source .venv/bin/activate && maturin develop --release -m Cargo.toml
-	.venv/bin/pip install -e .
+
+# .venv/bin/pip install -e .
 
 pre-commit: venv
 	cargo fmt --all --manifest-path Cargo.toml && cargo clippy --all-features --manifest-path Cargo.toml
diff --git a/README.md b/README.md
index 457cc5d4..7734e82f 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,8 @@ df.group_by("dummy").agg(
 )
 ```
 
+To avoid `Chunked array is not contiguous` error, try to rechunk your dataframe.
+
 The package right now contains two extensions:
 
 ## Numeric Extension
diff --git a/examples/basics.ipynb b/examples/basics.ipynb
new file mode 100644
index 00000000..29076739
--- /dev/null
+++ b/examples/basics.ipynb
@@ -0,0 +1,708 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "529f4422-5c3a-4bd6-abe0-a15edfc62abb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from polars_ds import StrExt, NumExt\n",
+    "import polars as pl\n",
+    "import numpy as np "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3aef5c69-fff3-4779-9b58-f939d725f0b0",
+   "metadata": {},
+   "source": [
+    "# Num Extensions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "430fec01-5d0b-422f-b099-c86037512b6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (5, 7)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>f</th><th>dummy</th><th>a</th><th>b</th><th>x1</th><th>x2</th><th>y</th></tr><tr><td>f64</td><td>str</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>0.0</td><td>&quot;a&quot;</td><td>0.025129</td><td>0.480631</td><td>0</td><td>100000</td><td>-100000</td></tr><tr><td>0.841471</td><td>&quot;a&quot;</td><td>0.122904</td><td>0.602584</td><td>1</td><td>100001</td><td>-99999</td></tr><tr><td>0.909297</td><td>&quot;a&quot;</td><td>0.000696</td><td>0.84385</td><td>2</td><td>100002</td><td>-99998</td></tr><tr><td>0.14112</td><td>&quot;a&quot;</td><td>0.1988</td><td>0.8419</td><td>3</td><td>100003</td><td>-99997</td></tr><tr><td>-0.756802</td><td>&quot;a&quot;</td><td>0.698176</td><td>0.464593</td><td>4</td><td>100004</td><td>-99996</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (5, 7)\n",
+       "┌───────────┬───────┬──────────┬──────────┬─────┬────────┬─────────┐\n",
+       "│ f         ┆ dummy ┆ a        ┆ b        ┆ x1  ┆ x2     ┆ y       │\n",
+       "│ ---       ┆ ---   ┆ ---      ┆ ---      ┆ --- ┆ ---    ┆ ---     │\n",
+       "│ f64       ┆ str   ┆ f64      ┆ f64      ┆ i64 ┆ i64    ┆ i64     │\n",
+       "╞═══════════╪═══════╪══════════╪══════════╪═════╪════════╪═════════╡\n",
+       "│ 0.0       ┆ a     ┆ 0.025129 ┆ 0.480631 ┆ 0   ┆ 100000 ┆ -100000 │\n",
+       "│ 0.841471  ┆ a     ┆ 0.122904 ┆ 0.602584 ┆ 1   ┆ 100001 ┆ -99999  │\n",
+       "│ 0.909297  ┆ a     ┆ 0.000696 ┆ 0.84385  ┆ 2   ┆ 100002 ┆ -99998  │\n",
+       "│ 0.14112   ┆ a     ┆ 0.1988   ┆ 0.8419   ┆ 3   ┆ 100003 ┆ -99997  │\n",
+       "│ -0.756802 ┆ a     ┆ 0.698176 ┆ 0.464593 ┆ 4   ┆ 100004 ┆ -99996  │\n",
+       "└───────────┴───────┴──────────┴──────────┴─────┴────────┴─────────┘"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "size = 100_000\n",
+    "df = pl.DataFrame({\n",
+    "    \"f\": np.sin(list(range(size)))\n",
+    "    , \"dummy\": [\"a\"] * (size // 2) + [\"b\"] * (size // 2)\n",
+    "    , \"a\": np.random.random(size = size)\n",
+    "    , \"b\": np.random.random(size = size)\n",
+    "    , \"x1\" : range(size)\n",
+    "    , \"x2\" : range(size, size + size)\n",
+    "    , \"y\": range(-size, 0)\n",
+    "})\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b6f98453-34cd-4afc-b35d-db58fa60a69a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (1, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>x1</th></tr><tr><td>f64</td></tr></thead><tbody><tr><td>0.0</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (1, 1)\n",
+       "┌─────┐\n",
+       "│ x1  │\n",
+       "│ --- │\n",
+       "│ f64 │\n",
+       "╞═════╡\n",
+       "│ 0.0 │\n",
+       "└─────┘"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Column-wise Jaccard Similarity. Result should be 0 as they are distinct\n",
+    "df.select(\n",
+    "    pl.col(\"x1\").num_ext.jaccard(pl.col(\"x2\"))\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "416d5346-e75b-4769-a953-e898d6a4d84c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (5, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>re</th><th>im</th></tr><tr><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>1.812028</td><td>0.0</td></tr><tr><td>1.812028</td><td>-0.000002</td></tr><tr><td>1.812028</td><td>-0.000005</td></tr><tr><td>1.812028</td><td>-0.000007</td></tr><tr><td>1.812028</td><td>-0.00001</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (5, 2)\n",
+       "┌──────────┬───────────┐\n",
+       "│ re       ┆ im        │\n",
+       "│ ---      ┆ ---       │\n",
+       "│ f64      ┆ f64       │\n",
+       "╞══════════╪═══════════╡\n",
+       "│ 1.812028 ┆ 0.0       │\n",
+       "│ 1.812028 ┆ -0.000002 │\n",
+       "│ 1.812028 ┆ -0.000005 │\n",
+       "│ 1.812028 ┆ -0.000007 │\n",
+       "│ 1.812028 ┆ -0.00001  │\n",
+       "└──────────┴───────────┘"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# FFT\n",
+    "df.select(\n",
+    "    pl.col(\"f\").num_ext.fft()\n",
+    ").unnest(\"f\").head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ed47b643-6bcc-43f6-9a25-82168c33e7fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (1, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>y</th></tr><tr><td>list[f64]</td></tr></thead><tbody><tr><td>[2.0, -1.0]</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (1, 1)\n",
+       "┌─────────────┐\n",
+       "│ y           │\n",
+       "│ ---         │\n",
+       "│ list[f64]   │\n",
+       "╞═════════════╡\n",
+       "│ [2.0, -1.0] │\n",
+       "└─────────────┘"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Least Square (Linear Regression)\n",
+    "df.select(\n",
+    "    pl.col(\"y\").num_ext.lstsq(pl.col(\"x1\"), pl.col(\"x2\"), add_bias=False)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0e9fb061-340d-423d-9107-772387006ff2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (2, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>dummy</th><th>list_float</th></tr><tr><td>str</td><td>list[f64]</td></tr></thead><tbody><tr><td>&quot;a&quot;</td><td>[2.0, -1.0]</td></tr><tr><td>&quot;b&quot;</td><td>[2.0, -1.0]</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (2, 2)\n",
+       "┌───────┬─────────────┐\n",
+       "│ dummy ┆ list_float  │\n",
+       "│ ---   ┆ ---         │\n",
+       "│ str   ┆ list[f64]   │\n",
+       "╞═══════╪═════════════╡\n",
+       "│ a     ┆ [2.0, -1.0] │\n",
+       "│ b     ┆ [2.0, -1.0] │\n",
+       "└───────┴─────────────┘"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.group_by(\"dummy\").agg(\n",
+    "    pl.col(\"y\").num_ext.lstsq(pl.col(\"x1\"), pl.col(\"x2\"), add_bias=False)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "d8fda8ca-57e7-4e02-a3f0-283ecce66a59",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (1, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>y</th></tr><tr><td>f64</td></tr></thead><tbody><tr><td>-0.0</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (1, 1)\n",
+       "┌──────┐\n",
+       "│ y    │\n",
+       "│ ---  │\n",
+       "│ f64  │\n",
+       "╞══════╡\n",
+       "│ -0.0 │\n",
+       "└──────┘"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Conditional Entropy, should be 0 because x1 is an ID\n",
+    "df.select(\n",
+    "    pl.col(\"y\").num_ext.cond_entropy(pl.col(\"x1\"))\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "cf55f007-a7c2-4a78-a93b-4d83bfefe95c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# t statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "b78e8775-c50e-4d1f-a482-a5f76a358a9b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (1, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>a</th></tr><tr><td>f64</td></tr></thead><tbody><tr><td>-0.242792</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (1, 1)\n",
+       "┌───────────┐\n",
+       "│ a         │\n",
+       "│ ---       │\n",
+       "│ f64       │\n",
+       "╞═══════════╡\n",
+       "│ -0.242792 │\n",
+       "└───────────┘"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.select(\n",
+    "    pl.col(\"a\").num_ext.t_2samp(pl.col(\"b\"))\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "bf70afa1-28f9-4227-a58f-aa49ed722e4a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (1, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>a</th></tr><tr><td>list[f64]</td></tr></thead><tbody><tr><td>[-0.242792, 199997.660059]</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (1, 1)\n",
+       "┌────────────────────────────┐\n",
+       "│ a                          │\n",
+       "│ ---                        │\n",
+       "│ list[f64]                  │\n",
+       "╞════════════════════════════╡\n",
+       "│ [-0.242792, 199997.660059] │\n",
+       "└────────────────────────────┘"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.select(\n",
+    "    pl.col(\"a\").num_ext.welch_t(pl.col(\"b\"), return_df = True)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "f7c22e5e-b724-4ed9-827f-1a4f41870b8c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (2, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>dummy</th><th>t</th></tr><tr><td>str</td><td>f64</td></tr></thead><tbody><tr><td>&quot;b&quot;</td><td>-146.557106</td></tr><tr><td>&quot;a&quot;</td><td>-146.902724</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (2, 2)\n",
+       "┌───────┬─────────────┐\n",
+       "│ dummy ┆ t           │\n",
+       "│ ---   ┆ ---         │\n",
+       "│ str   ┆ f64         │\n",
+       "╞═══════╪═════════════╡\n",
+       "│ b     ┆ -146.557106 │\n",
+       "│ a     ┆ -146.902724 │\n",
+       "└───────┴─────────────┘"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.group_by(\"dummy\").agg(\n",
+    "    pl.col(\"f\").num_ext.t_2samp(pl.col(\"b\")).alias(\"t\")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85d0d094-3c4c-4230-a589-1027c5690162",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8d7c6e3-0f1d-45f0-9fdb-cdb303b98556",
+   "metadata": {},
+   "source": [
+    "# Str Extension"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "54ad36f9-264e-4a49-bf36-936639440edf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (5, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>sen</th><th>word</th></tr><tr><td>str</td><td>str</td></tr></thead><tbody><tr><td>&quot;Hello, world! …</td><td>&quot;words&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;words&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;words&quot;</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (5, 2)\n",
+       "┌───────────────────────────────────┬───────┐\n",
+       "│ sen                               ┆ word  │\n",
+       "│ ---                               ┆ ---   │\n",
+       "│ str                               ┆ str   │\n",
+       "╞═══════════════════════════════════╪═══════╡\n",
+       "│ Hello, world! I'm going to churc… ┆ words │\n",
+       "│ Hello, world! I'm going to churc… ┆ word  │\n",
+       "│ Hello, world! I'm going to churc… ┆ words │\n",
+       "│ Hello, world! I'm going to churc… ┆ word  │\n",
+       "│ Hello, world! I'm going to churc… ┆ words │\n",
+       "└───────────────────────────────────┴───────┘"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "size = 100_000\n",
+    "df = pl.DataFrame({\n",
+    "    \"sen\":[\"Hello, world! I'm going to church.\"] * size,\n",
+    "    \"word\":[\"words\", \"word\"] * (size //2)\n",
+    "})\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "ee123a7e-7f9b-4f48-a5d5-6354799201ab",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (5, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>sen</th></tr><tr><td>str</td></tr></thead><tbody><tr><td>&quot;hello&quot;</td></tr><tr><td>&quot;world&quot;</td></tr><tr><td>&quot;going&quot;</td></tr><tr><td>&quot;to&quot;</td></tr><tr><td>&quot;church&quot;</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (5, 1)\n",
+       "┌────────┐\n",
+       "│ sen    │\n",
+       "│ ---    │\n",
+       "│ str    │\n",
+       "╞════════╡\n",
+       "│ hello  │\n",
+       "│ world  │\n",
+       "│ going  │\n",
+       "│ to     │\n",
+       "│ church │\n",
+       "└────────┘"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Tokenize\n",
+    "df.select(\n",
+    "    pl.col(\"sen\").str.to_lowercase().str_ext.tokenize().explode().unique()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "f33017e3-17df-498b-93d9-1d656a344388",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (4, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>sen</th></tr><tr><td>str</td></tr></thead><tbody><tr><td>&quot;world&quot;</td></tr><tr><td>&quot;hello&quot;</td></tr><tr><td>&quot;church&quot;</td></tr><tr><td>&quot;go&quot;</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (4, 1)\n",
+       "┌────────┐\n",
+       "│ sen    │\n",
+       "│ ---    │\n",
+       "│ str    │\n",
+       "╞════════╡\n",
+       "│ world  │\n",
+       "│ hello  │\n",
+       "│ church │\n",
+       "│ go     │\n",
+       "└────────┘"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.select(\n",
+    "    pl.col(\"sen\").str.to_lowercase().str_ext.tokenize(stem=True).explode().unique()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "69237c02-5f9f-4e92-b68d-6ac43aad1a79",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (100_000, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>word</th></tr><tr><td>u32</td></tr></thead><tbody><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>&hellip;</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>1</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (100_000, 1)\n",
+       "┌──────┐\n",
+       "│ word │\n",
+       "│ ---  │\n",
+       "│ u32  │\n",
+       "╞══════╡\n",
+       "│ 2    │\n",
+       "│ 1    │\n",
+       "│ 2    │\n",
+       "│ 1    │\n",
+       "│ …    │\n",
+       "│ 2    │\n",
+       "│ 1    │\n",
+       "│ 2    │\n",
+       "│ 1    │\n",
+       "└──────┘"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.select(\n",
+    "    pl.col(\"word\").str_ext.levenshtein_dist(\"world\")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "2dad7633-67fa-47f3-b86a-9f4cd097a650",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr > th,\n",
+       ".dataframe > tbody > tr > td {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (50_000, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>sen</th><th>word</th></tr><tr><td>str</td><td>str</td></tr></thead><tbody><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr><tr><td>&quot;Hello, world! …</td><td>&quot;word&quot;</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (50_000, 2)\n",
+       "┌───────────────────────────────────┬──────┐\n",
+       "│ sen                               ┆ word │\n",
+       "│ ---                               ┆ ---  │\n",
+       "│ str                               ┆ str  │\n",
+       "╞═══════════════════════════════════╪══════╡\n",
+       "│ Hello, world! I'm going to churc… ┆ word │\n",
+       "│ Hello, world! I'm going to churc… ┆ word │\n",
+       "│ Hello, world! I'm going to churc… ┆ word │\n",
+       "│ Hello, world! I'm going to churc… ┆ word │\n",
+       "│ …                                 ┆ …    │\n",
+       "│ Hello, world! I'm going to churc… ┆ word │\n",
+       "│ Hello, world! I'm going to churc… ┆ word │\n",
+       "│ Hello, world! I'm going to churc… ┆ word │\n",
+       "│ Hello, world! I'm going to churc… ┆ word │\n",
+       "└───────────────────────────────────┴──────┘"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.filter(\n",
+    "    pl.col(\"word\").str_ext.levenshtein_dist(\"world\") == 1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4f45d3d-d3b9-4fde-9ed5-b3d01d0fa1ba",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8073ff19-21da-449d-87c5-2791a574bc81",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02a88a93-8805-4a97-a94e-196fba7090c5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pyproject.toml b/pyproject.toml
index 539aedeb..cfc5809d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: PyPy",
     "License :: OSI Approved :: MIT License",
 ]
-version = "0.1.0"
+version = "0.1.1"
 authors = [
     {name = "Tianren Qin", email = "tq9695@gmail.com"},
     {name = "Nelson Griffiths", email = "nelsongriffiths123@gmail.com"}
@@ -33,6 +33,7 @@ module-name = "polars_ds._polars_ds"
 [project.optional-dependencies]
 dev = [
     "pytest >= 7.4.1",
+    "pre-commit"
 ]
 
 [tool.ruff]
diff --git a/python/polars_ds/__init__.py b/python/polars_ds/__init__.py
index 71a71e46..51246c53 100644
--- a/python/polars_ds/__init__.py
+++ b/python/polars_ds/__init__.py
@@ -1,2 +1,8 @@
+version = "0.1.1"
 
-version = "0.1.0"
\ No newline at end of file
+from polars_ds.extensions import NumExt, StrExt  # noqa: E402
+
+__all__ = [
+    "NumExt",
+    "StrExt"
+]
\ No newline at end of file
diff --git a/python/polars_ds/extensions.py b/python/polars_ds/extensions.py
index 391ccd55..78b89552 100644
--- a/python/polars_ds/extensions.py
+++ b/python/polars_ds/extensions.py
@@ -1,6 +1,7 @@
 import polars as pl
 from typing import Union
 from polars.utils.udfs import _get_shared_lib_location
+# from polars.type_aliases import IntoExpr
 
 lib = _get_shared_lib_location(__file__)
 
@@ -114,59 +115,73 @@ def lcm(self, other: Union[int, pl.Expr]) -> pl.Expr:
             is_elementwise=True,
         )
 
-    def hubor_loss(self, other: pl.Expr, delta: float) -> pl.Expr:
+    def hubor_loss(self, pred: pl.Expr, delta: float) -> pl.Expr:
         """
-        Computes huber loss between this and the other expression
+        Computes huber loss between this and the other expression. This assumes
+        this expression is actual, and the input is predicted, although the order
+        does not matter in this case.
 
         Parameters
         ----------
-        other
-            Either an int or a Polars expression
+        pred
+            A Polars expression representing predictions
         """
-        temp = (self._expr - other).abs()
+        temp = (self._expr - pred).abs()
         return (
-            pl.when(temp <= delta)
-            .then(0.5 * temp.pow(2))
-            .otherwise(delta * (temp - 0.5 * delta))
-            / self._expr.count()
+            pl.when(temp <= delta).then(0.5 * temp.pow(2)).otherwise(delta * (temp - 0.5 * delta)) / self._expr.count()
         )
 
-    def l1_loss(self, other: pl.Expr, normalize: bool = True) -> pl.Expr:
+    def l1_loss(self, pred: pl.Expr, normalize: bool = True) -> pl.Expr:
         """
-        Computes L1 loss (normalized L1 distance) between this and the other expression. This
-        is the norm without 1/p power.
+        Computes L1 loss (absolute difference) between this and the other expression.
 
         Parameters
         ----------
-        other
-            Either an int or a Polars expression
+        pred
+            A Polars expression representing predictions
         normalize
             If true, divide the result by length of the series
         """
-        temp = (self._expr - other).abs().sum()
+        temp = (self._expr - pred).abs().sum()
         if normalize:
             return temp / self._expr.count()
         return temp
 
-    def l2_loss(self, other: pl.Expr, normalize: bool = True) -> pl.Expr:
+    def l2_loss(self, pred: pl.Expr, normalize: bool = True) -> pl.Expr:
         """
         Computes L2 loss (normalized L2 distance) between this and the other expression. This
         is the norm without 1/p power.
 
-
         Parameters
         ----------
-        other
-            Either an int or a Polars expression
+        pred
+            A Polars expression representing predictions
         normalize
             If true, divide the result by length of the series
         """
-        temp = self._expr - other
+        temp = self._expr - pred
         temp = temp.dot(temp)
         if normalize:
             return temp / self._expr.count()
         return temp
 
+    def msle(self, pred: pl.Expr, normalize: bool = True) -> pl.Expr:
+        """
+        Computes the mean square log error.
+
+        Parameters
+        ----------
+        pred
+            A Polars expression representing predictions
+        normalize
+            If true, divide the result by length of the series
+        """
+        diff = self._expr.log1p() - pred.log1p()
+        out = diff.dot(diff)
+        if normalize:
+            return out / self._expr.count()
+        return out
+
     # def lp_loss(self, other: pl.Expr, p: float, normalize: bool = True) -> pl.Expr:
     #     """
     #     Computes LP loss (normalized LP distance) between this and the other expression. This
@@ -189,30 +204,30 @@ def l2_loss(self, other: pl.Expr, normalize: bool = True) -> pl.Expr:
     #         return (temp / self._expr.count())
     #     return temp
 
-    def chebyshev_loss(self, other: pl.Expr, normalize: bool = True) -> pl.Expr:
+    def chebyshev_loss(self, pred: pl.Expr, normalize: bool = True) -> pl.Expr:
         """
         Alias for l_inf_loss.
         """
-        return self.l_inf_dist(other, normalize)
+        return self.l_inf_dist(pred, normalize)
 
-    def l_inf_loss(self, other: pl.Expr, normalize: bool = True) -> pl.Expr:
+    def l_inf_loss(self, pred: pl.Expr, normalize: bool = True) -> pl.Expr:
         """
         Computes L^infinity loss between this and the other expression
 
         Parameters
-        ---------- 
-        other
-            Either an int or a Polars expression
+        ----------
+        pred
+            A Polars expression representing predictions
         normalize
             If true, divide the result by length of the series
         """
-        temp = self._expr - other
+        temp = self._expr - pred
         out = pl.max_horizontal(temp.min().abs(), temp.max().abs())
         if normalize:
             return out / self._expr.count()
         return out
 
-    def mape(self, other: pl.Expr, weighted: bool = False) -> pl.Expr:
+    def mape(self, pred: pl.Expr, weighted: bool = False) -> pl.Expr:
         """
         Computes mean absolute percentage error between self and other. Self is actual.
         If weighted, it will compute the weighted version as defined here:
@@ -221,17 +236,17 @@ def mape(self, other: pl.Expr, weighted: bool = False) -> pl.Expr:
 
         Parameters
         ----------
-        other
-            Either an int or a Polars expression
+        pred
+            A Polars expression representing predictions
         weighted
             If true, computes wMAPE in the wikipedia article
         """
         if weighted:
-            return (self._expr - other).abs().sum() / self._expr.abs().sum()
+            return (self._expr - pred).abs().sum() / self._expr.abs().sum()
         else:
-            return (1 - other / self._expr).abs().mean()
+            return (1 - pred / self._expr).abs().mean()
 
-    def smape(self, other: pl.Expr) -> pl.Expr:
+    def smape(self, pred: pl.Expr) -> pl.Expr:
         """
         Computes symmetric mean absolute percentage error between self and other. Self is actual.
         The value is always between 0 and 1. This is the third version in the wikipedia without
@@ -241,30 +256,146 @@ def smape(self, other: pl.Expr) -> pl.Expr:
 
         Parameters
         ----------
-        other
-            Either an int or a Polars expression
+        pred
+            A Polars expression representing predictions
         """
-        numerator = (self._expr - other).abs()
-        denominator = 1.0 / (self._expr.abs() + other.abs())
+        numerator = (self._expr - pred).abs()
+        denominator = 1.0 / (self._expr.abs() + pred.abs())
         return (1.0 / self._expr.count()) * numerator.dot(denominator)
 
-    def bce(self, actual: pl.Expr, normalize:bool=True) -> pl.Expr:
+    def bce(self, pred: pl.Expr, normalize: bool = True) -> pl.Expr:
         """
-        Treats self as the prediction. and computes Binary Cross Entropy loss.
+        Computes Binary Cross Entropy loss.
 
         Parameters
         ----------
-        actual
-            The actual binary lable. Note: if this column is not binary, then the result
-            will be nonsense.
+        pred
+            The predicted probability.
         normalize
             Whether to divide by N.
         """
-        out = actual.dot(self._expr.log()) + (1 - actual).dot((1 - self._expr).log())
+        out = pred.dot(self._expr.log()) + (1 - pred).dot((1 - self._expr).log())
         if normalize:
             return -(out / self._expr.count())
         return -out
 
+    def r2(self, pred: pl.Expr) -> pl.Expr:
+        """
+        Returns the coefficient of determineation for a regression model.
+
+        Parameters
+        ----------
+        pred
+            A Polars expression representing predictions
+        """
+        diff = self._expr - pred
+        ss_res = diff.dot(diff)
+        diff2 = self._expr - self._expr.mean()
+        ss_tot = diff2.dot(diff2)
+        return 1.0 - ss_res / ss_tot
+
+    def adjusted_r2(self, pred: pl.Expr, p: int) -> pl.Expr:
+        """
+        Returns the adjusted r2 for a regression model.
+
+        Parameters
+        ----------
+        pred
+            A Polars expression representing predictions
+        p
+            The total number of explanatory variables in the model
+        """
+        diff = self._expr - pred
+        ss_res = diff.dot(diff)
+        diff2 = self._expr - self._expr.mean()
+        ss_tot = diff2.dot(diff2)
+        df_res = self._expr.count() - p
+        df_tot = self._expr.count() - 1
+        return 1.0 - (ss_res / df_res) / (ss_tot / df_tot)
+
+    def powi(self, n: Union[int, pl.Expr]) -> pl.Expr:
+        """
+        Computes positive integer power using the fast exponentiation algorithm. This is the
+        fastest when n is an integer input (Faster than Polars's builtin when n >= 16). When n
+        is an expression, it would depend on values in the expression (Still researching...)
+
+        Parameters
+        ----------
+        n
+            A single positive int or an expression representing a column of type i32. If type is
+            not i32, an error will occur.
+        """
+
+        if isinstance(n, int):
+            n_ = pl.lit(n, pl.Int32)
+        else:
+            n_ = n
+
+        return self._expr.register_plugin(
+            lib=lib, symbol="pl_fast_exp", args=[n_], is_elementwise=True, returns_scalar=False
+        )
+
+    def t_2samp(self, other: pl.Expr) -> pl.Expr:
+        """
+        Computes the t statistics for an Independent two-sample t-test. It is highly recommended
+        that nulls be imputed before calling this.
+
+        Parameters
+        ----------
+        other
+            Either an int or a Polars expression
+        """
+        numerator = self._expr.mean() - other.mean()
+        denom = ((self._expr.var() + other.var()) / self._expr.count()).sqrt()
+        return numerator / denom
+
+    def welch_t(self, other: pl.Expr, return_df: bool = True) -> pl.Expr:
+        """
+        Computes the statistics for Welch's t-test. Welch's t-test is often used when
+        the two series do not have the same length. Two series in a dataframe will always
+        have the same length. Here, only non-null values are counted.
+
+        Parameters
+        ----------
+        other
+            Either an int or a Polars expression
+        return_df
+            Whether to return the degree of freedom or not.
+        """
+        e1 = self._expr.drop_nulls()
+        e2 = other.drop_nulls()
+        numerator = e1.mean() - e2.mean()
+        s1: pl.Expr = e1.var() / e1.count()
+        s2: pl.Expr = e2.var() / e2.count()
+        denom = (s1 + s2).sqrt()
+        if return_df:
+            df_num = (s1 + s2).pow(2)
+            df_denom = s1.pow(2) / (e1.count() - 1) + s2.pow(2) / (e2.count() - 1)
+            return pl.concat_list(numerator / denom, df_num / df_denom)
+        else:
+            return numerator / denom
+
+    def jaccard(self, other: pl.Expr, include_null: bool = False) -> pl.Expr:
+        """
+        Computes jaccard similarity between this column and the other. This will hash entire
+        columns and compares the two hashsets. Note: only integer/str columns can be compared.
+        Input expressions must represent columns of the same dtype.
+
+        Parameters
+        ----------
+        other
+            Either an int or a Polars expression
+        include_null
+            Whether to include null as a distinct element.
+        """
+        return self._expr.register_plugin(
+            lib=lib,
+            symbol="pl_jaccard",
+            args=[other, pl.lit(include_null, dtype=pl.Boolean)],
+            is_elementwise=False,
+            returns_scalar=True,
+        )
+
     def cond_entropy(self, other: pl.Expr) -> pl.Expr:
         """
         Computes the conditional entropy of self(y) given other. H(y|other).
@@ -276,23 +407,19 @@ def cond_entropy(self, other: pl.Expr) -> pl.Expr:
         """
 
         return self._expr.register_plugin(
-            lib=lib,
-            symbol="pl_conditional_entropy",
-            args=[other],
-            is_elementwise=False,
-            returns_scalar=True
+            lib=lib, symbol="pl_conditional_entropy", args=[other], is_elementwise=False, returns_scalar=True
         )
 
-    def lstsq(self, *others: pl.Expr, add_bias:bool=False) -> pl.Expr:
+    def lstsq(self, *others: pl.Expr, add_bias: bool = False) -> pl.Expr:
         """
-        Computes least squares solution to a linear matrix equation. If columns are
+        Computes least squares solution to the equation Ax = y. If columns are
         not linearly independent, some numerical issue may occur. E.g you may see
-        unrealistic coefficient in the output. This is a `silent` numerical issue during the 
-        computation.
+        unrealistic coefficient in the output. It is possible to have `silent` numerical
+        issue during computation.
+
+        All positional arguments should be expressions representing predictive variables. This
+        does not support composite expressions like pl.col(["a", "b"]), pl.all(), etc.
 
-        All positional arguments should be expressions representing individual columns. This does
-        not support composite expressions like pl.col(["a", "b"]), pl.all(), etc.
-        
         If add_bias is true, it will be the last coefficient in the output
         and output will have length |other| + 1
 
@@ -309,13 +436,13 @@ def lstsq(self, *others: pl.Expr, add_bias:bool=False) -> pl.Expr:
             symbol="pl_lstsq",
             args=[pl.lit(add_bias, dtype=pl.Boolean)] + list(others),
             is_elementwise=False,
-            returns_scalar=True
+            returns_scalar=True,
         )
 
-    def fft(self, forward:bool=True) -> pl.Expr:
+    def fft(self, forward: bool = True) -> pl.Expr:
         """
         Computes the DST transform of input series using FFT Algorithm. A series of equal length will
-        be returned, with elements being the real and complex part of the transformed values. 
+        be returned, with elements being the real and complex part of the transformed values.
 
         Parameters
         ----------
@@ -329,17 +456,13 @@ def fft(self, forward:bool=True) -> pl.Expr:
             is_elementwise=True,
         )
 
+
 @pl.api.register_expr_namespace("str_ext")
 class StrExt:
     def __init__(self, expr: pl.Expr):
         self._expr: pl.Expr = expr
 
-    def str_jaccard(
-        self
-        , other: Union[str, pl.Expr]
-        , substr_size: int = 2
-        , parallel: bool = False
-    ) -> pl.Expr:
+    def str_jaccard(self, other: Union[str, pl.Expr], substr_size: int = 2, parallel: bool = False) -> pl.Expr:
         """
         Treats substrings of size `substr_size` as a set. And computes the jaccard similarity between
         this word and the other.
@@ -369,11 +492,7 @@ def str_jaccard(
             is_elementwise=True,
         )
 
-    def levenshtein_dist(
-        self
-        , other: Union[str, pl.Expr]
-        , parallel: bool = False
-    ) -> pl.Expr:
+    def levenshtein_dist(self, other: Union[str, pl.Expr], parallel: bool = False) -> pl.Expr:
         """
         Computes the levenshtein distance between this each value in the column with the str other.
 
@@ -399,11 +518,7 @@ def levenshtein_dist(
             is_elementwise=True,
         )
 
-    def hamming_dist(
-        self
-        , other: Union[str, pl.Expr]
-        , parallel: bool = False
-    ) -> pl.Expr:
+    def hamming_dist(self, other: Union[str, pl.Expr], parallel: bool = False) -> pl.Expr:
         """
         Computes the hamming distance between two strings. If they do not have the same length, null will
         be returned.
@@ -450,17 +565,40 @@ def tokenize(self, pattern: str = r"(?u)\b\w\w+\b", stem: bool = False) -> pl.Ex
                 .register_plugin(
                     lib=lib,
                     symbol="pl_snowball_stem",
+                    args=[pl.lit(True, dtype=pl.Boolean), pl.lit(False, dtype=pl.Boolean)],
                     is_elementwise=True,
-                )
+                )  # True to no stop word, False to Parallel
                 .drop_nulls()
-            ).list.unique()
+            )
         return out
 
-    def snowball(
-        self
-        , no_stopwords:bool=True
-        , parallel:bool=False
-    ) -> pl.Expr:
+    def freq_removal(self, lower: float = 0.05, upper: float = 0.95, parallel: bool = True) -> pl.Expr:
+        """
+        Removes from each documents words that are too frequent (in the entire dataset). This assumes
+        that the input expression represents lists of strings. E.g. output of tokenize.
+
+        Parameters
+        ----------
+        lower
+            Lower percentile. If a word's frequency is < than this, it will be removed.
+        upper
+            Upper percentile. If a word's frequency is > than this, it will be removed.
+        parallel
+            Whether to run word count in parallel. It is not recommended when you are in a group_by
+            context.
+        """
+
+        name = self._expr.meta.output_name(raise_if_undetermined=False)
+        vc = self._expr.list.explode().value_counts(parallel=parallel).sort()
+        lo = vc.struct.field("counts").quantile(lower)
+        u = vc.struct.field("counts").quantile(upper)
+        remove = (
+            vc.filter((vc.struct.field("counts") < lo) | (vc.struct.field("counts") > u)).struct.field(name).implode()
+        )
+
+        return self._expr.list.set_difference(remove)
+
+    def snowball(self, no_stopwords: bool = True, parallel: bool = False) -> pl.Expr:
         """
         Applies the snowball stemmer for the column. The column is supposed to be a column of single words.
 
diff --git a/src/num_ext/expressions.rs b/src/num_ext/expressions.rs
index c092663f..3a8815c7 100644
--- a/src/num_ext/expressions.rs
+++ b/src/num_ext/expressions.rs
@@ -1,12 +1,20 @@
 use faer::{prelude::*, MatRef};
 use faer::{IntoFaer, IntoNdarray};
-// use faer::polars::{polars_to_faer_f64, Frame};
 use ndarray::{Array1, Array2};
 use num;
+use num::traits::Inv;
 use polars::prelude::*;
 use polars_core::prelude::arity::binary_elementwise_values;
 use pyo3_polars::derive::polars_expr;
 use rustfft::FftPlanner;
+use hashbrown::HashSet;
+
+// use faer::polars::{polars_to_faer_f64, Frame};
+
+// fn numeric_output(input_fields: &[Field]) -> PolarsResult<Field> {
+//     let field = input_fields[0].clone();
+//     Ok(field)
+// }
 
 fn complex_output(_: &[Field]) -> PolarsResult<Field> {
     let real = Field::new("re", DataType::Float64);
@@ -58,6 +66,114 @@ fn pl_lcm(inputs: &[Series]) -> PolarsResult<Series> {
     }
 }
 
+
+fn fast_exp_single(s:Series, n:i32) -> Series {
+
+    if n == 0 {
+        let ss = s.f64().unwrap();
+        let out:Float64Chunked = ss.apply_values(|x| {
+            if x == 0. {
+                f64::NAN
+            } else if x.is_infinite() | x.is_nan() {
+                x
+            } else {
+                1.0
+            }
+        });
+        return out.into_series()
+    } else if n < 0 {
+        return fast_exp_single(1.div(&s), -n)
+    }
+
+    let mut ss = s.clone();
+    let mut m = n;
+    let mut y = Series::from_vec("", vec![1_f64; s.len()]);
+    while m > 0 {
+        if m % 2 == 1 {
+            y = &y * &ss;
+        }
+        ss = &ss * &ss;
+        m >>= 1;
+    }
+    y
+
+ }
+
+ #[inline]
+ fn _fast_exp_pairwise(x:f64, n:u32) -> f64 {
+
+    let mut m = n;
+    let mut x = x;
+    let mut y:f64 = 1.0;
+    while m > 0 {
+        if m % 2 == 1 {
+            y *= x;
+        }
+        x *= x;
+        m >>= 1;
+    } 
+    y
+
+}
+
+#[inline]
+fn fast_exp_pairwise(x:f64, n:i32) -> f64 {
+
+    if n == 0 {
+        if x == 0. { // 0^0 is NaN
+            return f64::NAN
+        } else {
+            return 1.
+        }
+    } else if n < 0 {
+        return _fast_exp_pairwise(x.inv(), (-n) as u32)
+    }
+    _fast_exp_pairwise(x, n as u32)
+
+}
+
+
+#[polars_expr(output_type=Float64)]
+fn pl_fast_exp(inputs: &[Series]) -> PolarsResult<Series> {
+
+    let s = inputs[0].clone();
+    let exp = inputs[1].i32()?;
+
+    if exp.len() == 1 {
+        let n = exp.get(0).unwrap();
+        if s.dtype().is_numeric() {
+            let ss = s.cast(&DataType::Float64)?;
+            Ok(fast_exp_single(ss, n))
+        } else {  
+            Err(PolarsError::ComputeError(
+                "Input column type must be numeric.".into(),
+            ))
+        }
+    } else if s.len() == exp.len() {
+        if s.dtype().is_numeric() {
+            if s.dtype() == &DataType::Float64 {
+                let ca = s.f64()?;
+                let out:Float64Chunked = binary_elementwise_values(ca, exp, fast_exp_pairwise);
+                Ok(out.into_series())
+            } else {
+                let t = s.cast(&DataType::Float64)?;
+                let ca = t.f64()?;
+                let out:Float64Chunked = binary_elementwise_values(ca, exp, fast_exp_pairwise);
+                Ok(out.into_series())
+            }
+        } else {
+            Err(PolarsError::ComputeError(
+                "Input column type must be numeric.".into(),
+            ))
+        }
+    } else {
+        Err(PolarsError::ShapeMismatch(
+            "Inputs must have the same length.".into(),
+        ))
+    }
+
+}
+
 // Use QR to solve
 fn faer_lstsq_qr(x: MatRef<f64>, y: MatRef<f64>) -> Result<Array2<f64>, String> {
     let qr = x.qr();
@@ -85,7 +201,8 @@ fn pl_lstsq(inputs: &[Series]) -> PolarsResult<Series> {
     let add_bias = inputs[1].bool()?;
     let add_bias: bool = add_bias.get(0).unwrap();
     // y
-    let y = inputs[0].f64()?;
+    let y = inputs[0].rechunk(); // if not contiguous, this will do work. Otherwise, just a clone
+    let y = y.f64()?;
     let y = y.to_ndarray()?.into_shape((nrows, 1)).unwrap();
     let y = y.view().into_faer();
 
@@ -93,9 +210,9 @@ fn pl_lstsq(inputs: &[Series]) -> PolarsResult<Series> {
     let mut vec_series: Vec<Series> = Vec::with_capacity(inputs[2..].len() + 1);
     for (i, s) in inputs[2..].iter().enumerate() {
         let t: Series = match s.dtype() {
-            DataType::Float64 => s.clone().with_name(&i.to_string()),
+            DataType::Float64 => s.rechunk().with_name(&i.to_string()),
             _ => {
-                let t = s.clone().cast(&DataType::Float64)?;
+                let t = s.rechunk().cast(&DataType::Float64)?;
                 t.with_name(&i.to_string())
             }
         };
@@ -213,3 +330,69 @@ fn pl_fft(inputs: &[Series]) -> PolarsResult<Series> {
 
     Ok(fft_struct)
 }
+
+#[polars_expr(output_type=Float64)]
+fn pl_jaccard(inputs: &[Series]) -> PolarsResult<Series> {
+
+    let include_null = inputs[2].bool()?;
+    let include_null = include_null.get(0).unwrap();
+    
+    let (s1, s2) = if include_null {
+        (inputs[0].clone(), inputs[1].clone())
+    } else {
+        let t1 = inputs[0].clone();
+        let t2 = inputs[1].clone();
+        (t1.drop_nulls(), t2.drop_nulls())
+    };
+
+    // let parallel = inputs[3].bool()?;
+    // let parallel = parallel.get(0).unwrap();
+
+    if s1.dtype() != s2.dtype() {
+        return Err(PolarsError::ComputeError(
+            "Input column must have the same type.".into(),
+        ))
+    }
+
+    let (n1, n2, intersection) = 
+    if s1.dtype().is_integer() {
+        let ca1 = s1.cast(&DataType::Int64)?;
+        let ca2 = s2.cast(&DataType::Int64)?;
+        let ca1 = ca1.i64()?;
+        let ca2 = ca2.i64()?;
+
+        let hs1: HashSet<Option<i64>> = HashSet::from_iter(ca1);
+        let hs2: HashSet<Option<i64>> = HashSet::from_iter(ca2);
+        let n1 = hs1.len();
+        let n2 = hs2.len();
+
+        let intersection = hs1.intersection(&hs2);
+
+        (n1, n2, intersection.count())
+
+    } else if s1.dtype() == &DataType::Utf8 {
+        let ca1 = s1.utf8()?;
+        let ca2 = s2.utf8()?;
+
+        let hs1: HashSet<Option<&str>> = HashSet::from_iter(ca1);
+        let hs2: HashSet<Option<&str>> = HashSet::from_iter(ca2);
+        let n1 = hs1.len();
+        let n2 = hs2.len();
+
+        let intersection = hs1.intersection(&hs2);
+
+        (n1, n2, intersection.count())
+        
+    } else {
+        return Err(PolarsError::ComputeError(
+            "Jaccard similarity can only be computed for integer/str columns.".into(),
+        ))
+    };
+
+    let out: Series = Series::from_iter([
+        intersection as f64 / (n1 + n2 - intersection) as f64
+    ]);
+
+    Ok(out)
+    
+}
diff --git a/tests/test.ipynb b/tests/test.ipynb
new file mode 100644
index 00000000..ac65ee65
--- /dev/null
+++ b/tests/test.ipynb
@@ -0,0 +1,362 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "529f4422-5c3a-4bd6-abe0-a15edfc62abb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from polars_ds import NumExt, StrExt\n",
+    "import polars as pl\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3aef5c69-fff3-4779-9b58-f939d725f0b0",
+   "metadata": {},
+   "source": [
+    "# Num Extensions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "430fec01-5d0b-422f-b099-c86037512b6d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "size = 100_000\n",
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"f\": np.sin(list(range(size))),\n",
+    "        \"dummy\": [\"a\"] * (size // 2) + [\"b\"] * (size // 2),\n",
+    "        \"a\": np.random.random(size=size),\n",
+    "        \"b\": np.random.random(size=size),\n",
+    "        \"x1\": pl.Series(range(size), dtype=pl.Int32),\n",
+    "        \"x0\": pl.Series(range(size), dtype=pl.Int32),\n",
+    "        \"x2\": pl.Series(range(size, size + size), dtype=pl.Int32),\n",
+    "        \"y\": range(-size, 0),\n",
+    "    }\n",
+    ")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "26b9bf8c-5007-4571-b7f7-d04663b94e76",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dbf1f9b6-20f1-49b7-9cb5-7ad4d57ca819",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "68a5a972-ab9b-410a-a717-2124f21346a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from polars.testing import assert_frame_equal\n",
+    "\n",
+    "f1 = df.select(pl.col(\"f\").num_ext.powi(100_000))\n",
+    "f2 = df.select(pl.col(\"f\").pow(100_000))\n",
+    "assert_frame_equal(f1, f2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed14e014-2ff6-40cf-abb6-7bf2cf23c586",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%timeit df.select(pl.col(\"f\").pow(100_000))\n",
+    "%timeit df.select(pl.col(\"f\").num_ext.powi(100_000))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "037ee859-b668-4590-bc41-7d34f9cf3438",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f1 = df.select(pl.col(\"f\").num_ext.powi(8))\n",
+    "f2 = df.select(pl.col(\"f\").pow(8))\n",
+    "assert_frame_equal(f1, f2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf7b32b9-0d86-4bc6-8a8e-1971e402df38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%timeit df.select(pl.col(\"f\").pow(8))\n",
+    "%timeit df.select(pl.col(\"f\").num_ext.powi(8))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a0d7ed9-06d4-4a4d-ae2c-45002af870be",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f1 = df.select(pl.col(\"f\").num_ext.powi(16))\n",
+    "f2 = df.select(pl.col(\"f\").pow(16))\n",
+    "assert_frame_equal(f1, f2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38fe8a90-06e4-4dce-bf2e-cb8622b87e9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%timeit df.select(pl.col(\"f\").pow(16))\n",
+    "%timeit df.select(pl.col(\"f\").num_ext.powi(16))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "366fd5a7-d7ce-4632-b793-6bf8ce9157eb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# f1 = df.select(pl.col(\"f\").pow(pl.col(\"x1\")))\n",
+    "# f2 = df.select(pl.col(\"f\").num_ext.powi(pl.col(\"x1\")))\n",
+    "# assert_frame_equal(\n",
+    "#     f1, f2\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a20e7cfa-ff4a-4af1-8a9e-75d0d03aa1b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%timeit df.select(pl.col(\"f\").pow(pl.col(\"x1\")))\n",
+    "%timeit df.select(pl.col(\"f\").num_ext.powi(pl.col(\"x1\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1600edff-3187-4ee8-aa7f-cb1ea7d5ef32",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b20356ee-03be-4afa-af38-2eaadcd0ff20",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1e8ee876-e049-477b-8eee-09c8cb023415",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93389503-eee4-4623-bd9c-673f298387e0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a79e7d30-c57c-448d-93cd-e497de702610",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "416d5346-e75b-4769-a953-e898d6a4d84c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# FFT\n",
+    "df.select(pl.col(\"f\").num_ext.fft()).unnest(\"f\").head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed47b643-6bcc-43f6-9a25-82168c33e7fd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Least Square (Linear Regression)\n",
+    "df.select(pl.col(\"y\").num_ext.lstsq(pl.col(\"x1\"), pl.col(\"x2\"), add_bias=False))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e9fb061-340d-423d-9107-772387006ff2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.group_by(\"dummy\").agg(pl.col(\"y\").num_ext.lstsq(pl.col(\"x1\"), pl.col(\"x2\"), add_bias=False))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8fda8ca-57e7-4e02-a3f0-283ecce66a59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Conditional Entropy, should be 0 because x1 is an ID\n",
+    "df.select(pl.col(\"y\").num_ext.cond_entropy(pl.col(\"x1\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf55f007-a7c2-4a78-a93b-4d83bfefe95c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# t statistics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b78e8775-c50e-4d1f-a482-a5f76a358a9b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.select(pl.col(\"a\").num_ext.t_2samp(pl.col(\"b\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf70afa1-28f9-4227-a58f-aa49ed722e4a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.select(pl.col(\"a\").num_ext.welch_t(pl.col(\"b\"), return_df=True))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7c22e5e-b724-4ed9-827f-1a4f41870b8c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.group_by(\"dummy\").agg(pl.col(\"f\").num_ext.t_2samp(pl.col(\"b\")).alias(\"t\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85d0d094-3c4c-4230-a589-1027c5690162",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8d7c6e3-0f1d-45f0-9fdb-cdb303b98556",
+   "metadata": {},
+   "source": [
+    "# Str Extension"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "54ad36f9-264e-4a49-bf36-936639440edf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "size = 100_000\n",
+    "df = pl.DataFrame({\"sen\": [\"Hello, world! I'm going to church.\"] * size, \"word\": [\"words\", \"word\"] * (size // 2)})\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee123a7e-7f9b-4f48-a5d5-6354799201ab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Tokenize\n",
+    "df2 = df.select(\n",
+    "    pl.col(\"sen\").str.to_lowercase().str_ext.tokenize()  # .explode().unique()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "33fd3141-8245-4792-a8a4-8c06713603b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df2.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4f45d3d-d3b9-4fde-9ed5-b3d01d0fa1ba",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/test_ext.py b/tests/test_ext.py
index 3d38a2f8..85cc7a83 100644
--- a/tests/test_ext.py
+++ b/tests/test_ext.py
@@ -1,328 +1,210 @@
 import pytest
 import polars as pl
-from polars_ds.extensions import NumExt, StrExt  # noqa: F401
+import math
+from polars_ds import NumExt, StrExt  # noqa: F401
 from polars.testing import assert_frame_equal
 
+
 @pytest.mark.parametrize(
     "df, other, res",
     [
+        (pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 2, 10]}), 3, pl.DataFrame({"a": [1, 1, 3, 1, 1]})),
         (
-            pl.DataFrame({
-                "a": [1,2,3,4,5],
-                "b": [1,2,2,2,10]
-            }),
-            3,
-            pl.DataFrame({
-                "a": [1,1,3,1,1]
-            })
-        ),
-        (
-            pl.DataFrame({
-                "a": [1,2,3,4,5],
-                "b": [1,2,2,2,10]
-            }),
+            pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 2, 10]}),
             pl.col("b"),
-            pl.DataFrame({
-                "a": [1,2,1,2,5]
-            })
+            pl.DataFrame({"a": [1, 2, 1, 2, 5]}),
         ),
-    ]
-
+    ],
 )
 def test_gcd(df, other, res):
-    
-    assert_frame_equal(
-        df.select(
-            pl.col("a").num_ext.gcd(other)
-        ),
-        res
-    )
+    assert_frame_equal(df.select(pl.col("a").num_ext.gcd(other)), res)
+
+    assert_frame_equal(df.lazy().select(pl.col("a").num_ext.gcd(other)).collect(), res)
 
-    assert_frame_equal(
-        df.lazy().select(
-            pl.col("a").num_ext.gcd(other)
-        ).collect(),
-        res
-    )
 
 @pytest.mark.parametrize(
     "df, other, res",
     [
+        (pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 2, 10]}), 3, pl.DataFrame({"a": [3, 6, 3, 12, 15]})),
         (
-            pl.DataFrame({
-                "a": [1,2,3,4,5],
-                "b": [1,2,2,2,10]
-            }),
-            3,
-            pl.DataFrame({
-                "a": [3,6,3,12,15]
-            })
-        ),
-        (
-            pl.DataFrame({
-                "a": [1,2,3,4,5],
-                "b": [1,2,2,2,10]
-            }),
+            pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 2, 10]}),
             pl.col("b"),
-            pl.DataFrame({
-                "a": [1,2,6,4,10]
-            })
+            pl.DataFrame({"a": [1, 2, 6, 4, 10]}),
         ),
-    ]
-
+    ],
 )
 def test_lcm(df, other, res):
-    
-    assert_frame_equal(
-        df.select(
-            pl.col("a").num_ext.lcm(other)
-        ),
-        res
-    )
+    assert_frame_equal(df.select(pl.col("a").num_ext.lcm(other)), res)
+
+    assert_frame_equal(df.lazy().select(pl.col("a").num_ext.lcm(other)).collect(), res)
 
-    assert_frame_equal(
-        df.lazy().select(
-            pl.col("a").num_ext.lcm(other)
-        ).collect(),
-        res
-    )
 
 @pytest.mark.parametrize(
-    "df, res",
+    "df, p",
     [
         (
-            pl.DataFrame({
-                "y":[1,0,1,1,1,0,0,1],
-                "a": ["a", "b", "c", "a", "b", "c", "a", "a"]
-            }),
-            pl.DataFrame({
-                "y": [0.6277411625893767]
-            })
+            pl.DataFrame({"a": [0.1 + x / 1000 for x in range(1000)], "b": pl.Series(range(1000), dtype=pl.Int32)}),
+            pl.col("b"),
         ),
+        (pl.DataFrame({"a": [0.1 + x / 1000 for x in range(1000)], "b": pl.Series(range(1000), dtype=pl.Int32)}), 10),
+        (
+            pl.DataFrame(
+                {
+                    "a": [math.inf, math.nan],
+                }
+            ),
+            2,
+        ),
+    ],
+)
+def test_powi(df, p):
+    # The reason I avoided 0 is that
+    # In polars 0^0 = 1, which is wrong.
+    # In polars-ds, this will be mapped to NaN.
+    assert_frame_equal(df.select(pl.col("a").num_ext.powi(p)), df.select(pl.col("a").pow(p)))
+
+
+@pytest.mark.parametrize(
+    "df, res",
+    [
         (
-            pl.DataFrame({
-                "y":[1] * 8,
-                "a": ["a", "b", "c", "a", "b", "c", "a", "a"]
-            }),
-            pl.DataFrame({
-                "y": [-0.0]
-            })
+            pl.DataFrame({"y": [1, 0, 1, 1, 1, 0, 0, 1], "a": ["a", "b", "c", "a", "b", "c", "a", "a"]}),
+            pl.DataFrame({"y": [0.6277411625893767]}),
         ),
-    ]
+        (pl.DataFrame({"y": [1] * 8, "a": ["a", "b", "c", "a", "b", "c", "a", "a"]}), pl.DataFrame({"y": [-0.0]})),
+    ],
 )
 def test_cond_entropy(df, res):
-    
-    assert_frame_equal(
-        df.select(
-            pl.col("y").num_ext.cond_entropy(pl.col("a"))
-        ),
-        res
-    )
+    assert_frame_equal(df.select(pl.col("y").num_ext.cond_entropy(pl.col("a"))), res)
+
+    assert_frame_equal(df.lazy().select(pl.col("y").num_ext.cond_entropy(pl.col("a"))).collect(), res)
 
-    assert_frame_equal(
-        df.lazy().select(
-            pl.col("y").num_ext.cond_entropy(pl.col("a"))
-        ).collect(),
-        res
-    )
 
 # Hard to write generic tests because ncols can vary in X
 def test_lstsq():
+    df = pl.DataFrame({"y": [1, 2, 3, 4, 5], "a": [2, 3, 4, 5, 6], "b": [-1, -1, -1, -1, -1]})
+    res = pl.DataFrame({"y": [[1.0, 1.0]]})
+    assert_frame_equal(df.select(pl.col("y").num_ext.lstsq(pl.col("a"), pl.col("b"), add_bias=False)), res)
 
-    df = pl.DataFrame({
-        "y":[1,2,3,4,5],
-        "a": [2,3,4,5,6],
-        "b": [-1,-1,-1,-1,-1]
-    })
-    res = pl.DataFrame({
-        "y": [[1.0, 1.0]]
-    })
-    assert_frame_equal(
-        df.select(
-            pl.col("y").num_ext.lstsq(pl.col("a"), pl.col("b"), add_bias = False)
-        ),
-        res
+    df = pl.DataFrame(
+        {
+            "y": [1, 2, 3, 4, 5],
+            "a": [2, 3, 4, 5, 6],
+        }
     )
+    res = pl.DataFrame({"y": [[1.0, -1.0]]})
+    assert_frame_equal(df.select(pl.col("y").num_ext.lstsq(pl.col("a"), add_bias=True)), res)
 
-    df = pl.DataFrame({
-        "y":[1,2,3,4,5],
-        "a": [2,3,4,5,6],
-    })
-    res = pl.DataFrame({
-        "y": [[1.0, -1.0]]
-    })
-    assert_frame_equal(
-        df.select(
-            pl.col("y").num_ext.lstsq(pl.col("a"), add_bias = True)
-        ),
-        res
-    )
 
 @pytest.mark.parametrize(
     "df, res",
     [
         (
-            pl.DataFrame({
-                "a": ["thanks","thank","thankful"]
-            }),
-            pl.DataFrame({
-                "a": ["thank","thank","thank"]
-            })
+            pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [2, 3, 4, 5, 6]}),
+            pl.DataFrame({"j": [2 / 3]}),
         ),
+    ],
+)
+def test_col_jaccard(df, res):
+    assert_frame_equal(df.select(pl.col("a").num_ext.jaccard(pl.col("b")).alias("j")), res)
+
+    assert_frame_equal(df.lazy().select(pl.col("a").num_ext.jaccard(pl.col("b")).alias("j")).collect(), res)
+
+
+@pytest.mark.parametrize(
+    "df, res",
+    [
+        (pl.DataFrame({"a": ["thanks", "thank", "thankful"]}), pl.DataFrame({"a": ["thank", "thank", "thank"]})),
         (
-            pl.DataFrame({
-                "a": ["playful","playing", "play", "played", "plays"]
-            }),
-            pl.DataFrame({
-                "a": ["play","play", "play", "play", "play"]
-            })
+            pl.DataFrame({"a": ["playful", "playing", "play", "played", "plays"]}),
+            pl.DataFrame({"a": ["play", "play", "play", "play", "play"]}),
         ),
-    ]
+    ],
 )
 def test_snowball(df, res):
-    
-    assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.snowball()
-        ),
-        res
-    )
+    assert_frame_equal(df.select(pl.col("a").str_ext.snowball()), res)
 
-    assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.snowball(parallel=True)
-        ),
-        res
-    )
+    assert_frame_equal(df.select(pl.col("a").str_ext.snowball(parallel=True)), res)
+
+    assert_frame_equal(df.lazy().select(pl.col("a").str_ext.snowball()).collect(), res)
 
-    assert_frame_equal(
-        df.lazy().select(
-            pl.col("a").str_ext.snowball()
-        ).collect(),
-        res
-    )
 
 @pytest.mark.parametrize(
     "df, res",
     [
         (
-            pl.DataFrame({
-                "a":["karolin", "karolin", "kathrin", "0000", "2173896"],
-                "b":["kathrin", "kerstin", "kerstin", "1111", "2233796"]
-            }),
-            pl.DataFrame({
-                "a": pl.Series([3,3,4,4,3], dtype=pl.UInt32)
-            })
-        ),
-    ]
+            pl.DataFrame(
+                {
+                    "a": ["karolin", "karolin", "kathrin", "0000", "2173896"],
+                    "b": ["kathrin", "kerstin", "kerstin", "1111", "2233796"],
+                }
+            ),
+            pl.DataFrame({"a": pl.Series([3, 3, 4, 4, 3], dtype=pl.UInt32)}),
+        ),
+    ],
 )
 def test_hamming_dist(df, res):
+    assert_frame_equal(df.select(pl.col("a").str_ext.hamming_dist(pl.col("b"))), res)
+    assert_frame_equal(df.select(pl.col("a").str_ext.hamming_dist(pl.col("b"), parallel=True)), res)
+    assert_frame_equal(df.lazy().select(pl.col("a").str_ext.hamming_dist(pl.col("b"))).collect(), res)
 
-    assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.hamming_dist(pl.col("b"))
-        )
-        , res
-    )
-    assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.hamming_dist(pl.col("b"), parallel=True)
-        )
-        , res
-    )
-    assert_frame_equal(
-        df.lazy().select(
-            pl.col("a").str_ext.hamming_dist(pl.col("b"))
-        ).collect()
-        , res
-    )
 
 @pytest.mark.parametrize(
     "df, res",
     [
         (
-            pl.DataFrame({
-                "a":["kitten", "mary", "may"],
-                "b":["sitting", "merry", "mayer"]
-            }),
-            pl.DataFrame({
-                "a": pl.Series([3,2,2], dtype=pl.UInt32)
-            })
+            pl.DataFrame({"a": ["kitten", "mary", "may"], "b": ["sitting", "merry", "mayer"]}),
+            pl.DataFrame({"a": pl.Series([3, 2, 2], dtype=pl.UInt32)}),
         ),
-    ]
+    ],
 )
 def test_levenshtein_dist(df, res):
+    assert_frame_equal(df.select(pl.col("a").str_ext.levenshtein_dist(pl.col("b"))), res)
 
+    assert_frame_equal(df.select(pl.col("a").str_ext.levenshtein_dist(pl.col("b"), parallel=True)), res)
     assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.levenshtein_dist(pl.col("b"))
-        )
-        , res
+        df.select(pl.col("a").str_ext.levenshtein_dist("may")),
+        pl.DataFrame({"a": pl.Series([6, 1, 0], dtype=pl.UInt32)}),
     )
+    assert_frame_equal(df.lazy().select(pl.col("a").str_ext.levenshtein_dist(pl.col("b"))).collect(), res)
 
-    assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.levenshtein_dist(pl.col("b"), parallel=True)
-        )
-        , res
-    )
-    assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.levenshtein_dist("may")
-        )
-        , pl.DataFrame({
-            "a": pl.Series([6,1,0], dtype=pl.UInt32)
-        })
-    )
-    assert_frame_equal(
-        df.lazy().select(
-            pl.col("a").str_ext.levenshtein_dist(pl.col("b"))
-        ).collect()
-        , res
-    )
 
 @pytest.mark.parametrize(
     "df, size, res",
     [
         (
-            pl.DataFrame({
-                "a":["apple", "test", "moon"],
-                "b":["let", "tests", "sun"]
-            })
-            , 2
-            , pl.DataFrame({
-                "a": pl.Series([0.2,0.75,0.], dtype=pl.Float64)
-            })
+            pl.DataFrame({"a": ["apple", "test", "moon"], "b": ["let", "tests", "sun"]}),
+            2,
+            pl.DataFrame({"a": pl.Series([0.2, 0.75, 0.0], dtype=pl.Float64)}),
         ),
         (
-            pl.DataFrame({
-                "a":["apple", "test", "moon"],
-                "b":["let", "tests", "sun"]
-            })
-            , 3
-            , pl.DataFrame({
-                "a": pl.Series([0.0, 2/3 , 0.0], dtype=pl.Float64)
-            })
+            pl.DataFrame({"a": ["apple", "test", "moon"], "b": ["let", "tests", "sun"]}),
+            3,
+            pl.DataFrame({"a": pl.Series([0.0, 2 / 3, 0.0], dtype=pl.Float64)}),
         ),
-    ]
+    ],
 )
 def test_str_jaccard(df, size, res):
-
-    assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.str_jaccard(pl.col("b"), substr_size=size)
-        )
-        , res
-    )
+    assert_frame_equal(df.select(pl.col("a").str_ext.str_jaccard(pl.col("b"), substr_size=size)), res)
+    assert_frame_equal(df.select(pl.col("a").str_ext.str_jaccard(pl.col("b"), substr_size=size, parallel=True)), res)
     assert_frame_equal(
-        df.select(
-            pl.col("a").str_ext.str_jaccard(pl.col("b"), substr_size=size, parallel=True)
-        )
-        , res
+        df.lazy().select(pl.col("a").str_ext.str_jaccard(pl.col("b"), substr_size=size, parallel=True)).collect(), res
     )
-    assert_frame_equal(
-        df.lazy().select(
-            pl.col("a").str_ext.str_jaccard(pl.col("b"), substr_size=size, parallel=True)
-        ).collect()
-        , res
-    )
\ No newline at end of file
+
+
+@pytest.mark.parametrize(
+    "df, lower, upper, res",
+    [
+        (
+            pl.DataFrame({"a": [["a", "b", "c"], ["a", "b"], ["a"]]}),
+            0.05,
+            0.6,
+            pl.DataFrame({"a": [["b", "c"], ["b"], []]}),
+            # 0.05 is count of 1, nothing has < 1 count. 0.6 is 2. "a" has > 2 count
+            # so a is removed.
+        ),
+    ],
+)
+def test_freq_removal(df, lower, upper, res):
+    ans = df.select(pl.col("a").str_ext.freq_removal(lower=lower, upper=upper).list.sort())
+    assert_frame_equal(ans, res)

f	dummy	a	b	x1	x2	y
f64	str	f64	f64	i64	i64	i64
0.0	"a"	0.025129	0.480631	0	100000	-100000
0.841471	"a"	0.122904	0.602584	1	100001	-99999
0.909297	"a"	0.000696	0.84385	2	100002	-99998
0.14112	"a"	0.1988	0.8419	3	100003	-99997
-0.756802	"a"	0.698176	0.464593	4	100004	-99996
re	im
f64	f64
1.812028	0.0
1.812028	-0.000002
1.812028	-0.000005
1.812028	-0.000007
1.812028	-0.00001
sen	word
str	str
"Hello, world! …	"words"
"Hello, world! …	"word"
"Hello, world! …	"words"
"Hello, world! …	"word"
"Hello, world! …	"words"