diff --git a/docs/pipeline.md b/docs/pipeline.md
new file mode 100644
index 00000000..f69c21c6
--- /dev/null
+++ b/docs/pipeline.md
@@ -0,0 +1,3 @@
+## Polars Native Machine Learning Pipeline
+
+::: polars_ds.pipeline
\ No newline at end of file
diff --git a/docs/sample.md b/docs/sample.md
new file mode 100644
index 00000000..a6a2b493
--- /dev/null
+++ b/docs/sample.md
@@ -0,0 +1,3 @@
+## Polars Native Machine Learning Pipeline
+
+::: polars_ds.sample
\ No newline at end of file
diff --git a/examples/sample_and_split.ipynb b/examples/sample_and_split.ipynb
index a59f765f..3906b586 100644
--- a/examples/sample_and_split.ipynb
+++ b/examples/sample_and_split.ipynb
@@ -42,7 +42,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (5, 8)
row_num | uniform_1 | uniform_2 | exp | normal | fat_normal | flags | category |
---|
i64 | f64 | f64 | f64 | f64 | f64 | i32 | str |
0 | 1.864966 | 0.983112 | 2.134098 | 0.201208 | -510.167817 | 1 | "A" |
1 | 3.681365 | 0.072616 | 0.24552 | -2.180395 | -1230.797029 | 2 | "A" |
2 | 7.138163 | 0.068923 | 0.498513 | -1.737763 | -914.757436 | 2 | "A" |
3 | 9.2411 | 0.006129 | 0.670527 | -1.500905 | -1025.144372 | 1 | "A" |
4 | 7.874972 | 0.638764 | 2.159589 | 0.490217 | 1329.546535 | 0 | "A" |
"
+ "shape: (5, 8)row_num | uniform_1 | uniform_2 | exp | normal | fat_normal | flags | category |
---|
i64 | f64 | f64 | f64 | f64 | f64 | i32 | str |
0 | 0.101301 | 0.241236 | 0.068629 | -1.546608 | -1820.064986 | 0 | "A" |
1 | 7.763778 | 0.688527 | 1.564067 | -0.219875 | 2842.128922 | 0 | "A" |
2 | 6.692104 | 0.302039 | 2.184995 | -2.038565 | -107.693276 | 0 | "A" |
3 | 8.069798 | 0.589782 | 0.047991 | -1.029594 | 52.045207 | 1 | "A" |
4 | 4.709925 | 0.289922 | 0.538721 | 1.08094 | 711.807154 | 1 | "A" |
"
],
"text/plain": [
"shape: (5, 8)\n",
@@ -51,11 +51,11 @@
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ str │\n",
"╞═════════╪═══════════╪═══════════╪══════════╪═══════════╪══════════════╪═══════╪══════════╡\n",
- "│ 0 ┆ 1.864966 ┆ 0.983112 ┆ 2.134098 ┆ 0.201208 ┆ -510.167817 ┆ 1 ┆ A │\n",
- "│ 1 ┆ 3.681365 ┆ 0.072616 ┆ 0.24552 ┆ -2.180395 ┆ -1230.797029 ┆ 2 ┆ A │\n",
- "│ 2 ┆ 7.138163 ┆ 0.068923 ┆ 0.498513 ┆ -1.737763 ┆ -914.757436 ┆ 2 ┆ A │\n",
- "│ 3 ┆ 9.2411 ┆ 0.006129 ┆ 0.670527 ┆ -1.500905 ┆ -1025.144372 ┆ 1 ┆ A │\n",
- "│ 4 ┆ 7.874972 ┆ 0.638764 ┆ 2.159589 ┆ 0.490217 ┆ 1329.546535 ┆ 0 ┆ A │\n",
+ "│ 0 ┆ 0.101301 ┆ 0.241236 ┆ 0.068629 ┆ -1.546608 ┆ -1820.064986 ┆ 0 ┆ A │\n",
+ "│ 1 ┆ 7.763778 ┆ 0.688527 ┆ 1.564067 ┆ -0.219875 ┆ 2842.128922 ┆ 0 ┆ A │\n",
+ "│ 2 ┆ 6.692104 ┆ 0.302039 ┆ 2.184995 ┆ -2.038565 ┆ -107.693276 ┆ 0 ┆ A │\n",
+ "│ 3 ┆ 8.069798 ┆ 0.589782 ┆ 0.047991 ┆ -1.029594 ┆ 52.045207 ┆ 1 ┆ A │\n",
+ "│ 4 ┆ 4.709925 ┆ 0.289922 ┆ 0.538721 ┆ 1.08094 ┆ 711.807154 ┆ 1 ┆ A │\n",
"└─────────┴───────────┴───────────┴──────────┴───────────┴──────────────┴───────┴──────────┘"
]
},
@@ -85,7 +85,7 @@
{
"data": {
"text/plain": [
- "['row_num', 'fat_normal', 'flags']"
+ "['row_num', 'uniform_1', 'fat_normal']"
]
},
"execution_count": 3,
@@ -112,27 +112,27 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (60_000, 8)row_num | uniform_1 | uniform_2 | exp | normal | fat_normal | flags | category |
---|
i64 | f64 | f64 | f64 | f64 | f64 | i32 | str |
0 | 1.864966 | 0.983112 | 2.134098 | 0.201208 | -510.167817 | 1 | "A" |
1 | 3.681365 | 0.072616 | 0.24552 | -2.180395 | -1230.797029 | 2 | "A" |
2 | 7.138163 | 0.068923 | 0.498513 | -1.737763 | -914.757436 | 2 | "A" |
4 | 7.874972 | 0.638764 | 2.159589 | 0.490217 | 1329.546535 | 0 | "A" |
5 | 1.479464 | 0.703649 | 0.48033 | 0.550675 | 186.879146 | 1 | "A" |
6 | 11.354059 | 0.881735 | 2.399495 | -0.720839 | -376.389466 | 2 | "A" |
7 | 7.981127 | 0.199884 | 0.335183 | -0.223638 | -168.936382 | 2 | "A" |
8 | 3.539316 | 0.618786 | 2.968125 | -0.066433 | 3852.290115 | 0 | "A" |
12 | 2.881549 | 0.306653 | 1.828217 | 0.594723 | 1189.461612 | 0 | "A" |
13 | 11.286761 | 0.200552 | 4.836837 | 0.725427 | -18.039201 | 2 | "A" |
16 | 7.879337 | 0.194351 | 1.395224 | -1.621337 | 253.488052 | 0 | "A" |
17 | 6.88781 | 0.436873 | 2.56461 | 0.799571 | -573.229085 | 2 | "A" |
… | … | … | … | … | … | … | … |
99984 | 3.175364 | 0.521214 | 2.621609 | -0.259841 | -207.322052 | 1 | "C" |
99985 | 2.982497 | 0.046959 | 2.013501 | -1.38318 | -449.496094 | 0 | "C" |
99986 | 9.715497 | 0.173048 | 7.787587 | 0.031314 | -745.802171 | 1 | "C" |
99987 | 0.464149 | 0.052577 | 0.251207 | -0.550988 | 211.994715 | 1 | "C" |
99988 | 2.90002 | 0.140729 | 0.39061 | 0.420128 | 543.348445 | 0 | "C" |
99989 | 7.725916 | 0.608843 | 1.518604 | 0.872585 | -1046.055152 | 1 | "C" |
99990 | 2.550447 | 0.136828 | 1.878146 | -0.443894 | 1299.166574 | 1 | "C" |
99993 | 6.344932 | 0.91468 | 0.077694 | 0.667014 | 274.197044 | 1 | "C" |
99995 | 7.751136 | 0.991215 | 0.341938 | 0.74724 | 468.091164 | 1 | "C" |
99996 | 8.932153 | 0.149342 | 2.017135 | 0.254913 | -1555.19188 | 0 | "C" |
99997 | 6.366458 | 0.825137 | 1.149404 | -0.653029 | 211.288954 | 1 | "C" |
99999 | 5.021382 | 0.405893 | 2.745867 | 0.800525 | 1572.618783 | 1 | "C" |
"
+ "shape: (60_000, 8)row_num | uniform_1 | uniform_2 | exp | normal | fat_normal | flags | category |
---|
i64 | f64 | f64 | f64 | f64 | f64 | i32 | str |
2 | 6.692104 | 0.302039 | 2.184995 | -2.038565 | -107.693276 | 0 | "A" |
3 | 8.069798 | 0.589782 | 0.047991 | -1.029594 | 52.045207 | 1 | "A" |
4 | 4.709925 | 0.289922 | 0.538721 | 1.08094 | 711.807154 | 1 | "A" |
7 | 9.679737 | 0.431594 | 1.131895 | -0.739637 | 1269.613996 | 1 | "A" |
8 | 10.803224 | 0.546317 | 3.25459 | -0.904231 | 243.572733 | 2 | "A" |
… | … | … | … | … | … | … | … |
99991 | 10.25385 | 0.632615 | 0.263188 | -0.388282 | 273.417495 | 2 | "C" |
99993 | 5.213348 | 0.199494 | 4.928223 | -0.362607 | 1729.126716 | 0 | "C" |
99994 | 9.441603 | 0.347907 | 0.318096 | -0.112797 | 242.457284 | 0 | "C" |
99998 | 7.374466 | 0.030731 | 3.606166 | -0.582265 | 1290.937356 | 0 | "C" |
99999 | 7.164853 | 0.399791 | 0.354686 | -0.999868 | 1678.190405 | 1 | "C" |
"
],
"text/plain": [
"shape: (60_000, 8)\n",
- "┌─────────┬───────────┬───────────┬──────────┬───────────┬──────────────┬───────┬──────────┐\n",
- "│ row_num ┆ uniform_1 ┆ uniform_2 ┆ exp ┆ normal ┆ fat_normal ┆ flags ┆ category │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
- "│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ str │\n",
- "╞═════════╪═══════════╪═══════════╪══════════╪═══════════╪══════════════╪═══════╪══════════╡\n",
- "│ 0 ┆ 1.864966 ┆ 0.983112 ┆ 2.134098 ┆ 0.201208 ┆ -510.167817 ┆ 1 ┆ A │\n",
- "│ 1 ┆ 3.681365 ┆ 0.072616 ┆ 0.24552 ┆ -2.180395 ┆ -1230.797029 ┆ 2 ┆ A │\n",
- "│ 2 ┆ 7.138163 ┆ 0.068923 ┆ 0.498513 ┆ -1.737763 ┆ -914.757436 ┆ 2 ┆ A │\n",
- "│ 4 ┆ 7.874972 ┆ 0.638764 ┆ 2.159589 ┆ 0.490217 ┆ 1329.546535 ┆ 0 ┆ A │\n",
- "│ 5 ┆ 1.479464 ┆ 0.703649 ┆ 0.48033 ┆ 0.550675 ┆ 186.879146 ┆ 1 ┆ A │\n",
- "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
- "│ 99993 ┆ 6.344932 ┆ 0.91468 ┆ 0.077694 ┆ 0.667014 ┆ 274.197044 ┆ 1 ┆ C │\n",
- "│ 99995 ┆ 7.751136 ┆ 0.991215 ┆ 0.341938 ┆ 0.74724 ┆ 468.091164 ┆ 1 ┆ C │\n",
- "│ 99996 ┆ 8.932153 ┆ 0.149342 ┆ 2.017135 ┆ 0.254913 ┆ -1555.19188 ┆ 0 ┆ C │\n",
- "│ 99997 ┆ 6.366458 ┆ 0.825137 ┆ 1.149404 ┆ -0.653029 ┆ 211.288954 ┆ 1 ┆ C │\n",
- "│ 99999 ┆ 5.021382 ┆ 0.405893 ┆ 2.745867 ┆ 0.800525 ┆ 1572.618783 ┆ 1 ┆ C │\n",
- "└─────────┴───────────┴───────────┴──────────┴───────────┴──────────────┴───────┴──────────┘"
+ "┌─────────┬───────────┬───────────┬──────────┬───────────┬─────────────┬───────┬──────────┐\n",
+ "│ row_num ┆ uniform_1 ┆ uniform_2 ┆ exp ┆ normal ┆ fat_normal ┆ flags ┆ category │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ str │\n",
+ "╞═════════╪═══════════╪═══════════╪══════════╪═══════════╪═════════════╪═══════╪══════════╡\n",
+ "│ 2 ┆ 6.692104 ┆ 0.302039 ┆ 2.184995 ┆ -2.038565 ┆ -107.693276 ┆ 0 ┆ A │\n",
+ "│ 3 ┆ 8.069798 ┆ 0.589782 ┆ 0.047991 ┆ -1.029594 ┆ 52.045207 ┆ 1 ┆ A │\n",
+ "│ 4 ┆ 4.709925 ┆ 0.289922 ┆ 0.538721 ┆ 1.08094 ┆ 711.807154 ┆ 1 ┆ A │\n",
+ "│ 7 ┆ 9.679737 ┆ 0.431594 ┆ 1.131895 ┆ -0.739637 ┆ 1269.613996 ┆ 1 ┆ A │\n",
+ "│ 8 ┆ 10.803224 ┆ 0.546317 ┆ 3.25459 ┆ -0.904231 ┆ 243.572733 ┆ 2 ┆ A │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ 99991 ┆ 10.25385 ┆ 0.632615 ┆ 0.263188 ┆ -0.388282 ┆ 273.417495 ┆ 2 ┆ C │\n",
+ "│ 99993 ┆ 5.213348 ┆ 0.199494 ┆ 4.928223 ┆ -0.362607 ┆ 1729.126716 ┆ 0 ┆ C │\n",
+ "│ 99994 ┆ 9.441603 ┆ 0.347907 ┆ 0.318096 ┆ -0.112797 ┆ 242.457284 ┆ 0 ┆ C │\n",
+ "│ 99998 ┆ 7.374466 ┆ 0.030731 ┆ 3.606166 ┆ -0.582265 ┆ 1290.937356 ┆ 0 ┆ C │\n",
+ "│ 99999 ┆ 7.164853 ┆ 0.399791 ┆ 0.354686 ┆ -0.999868 ┆ 1678.190405 ┆ 1 ┆ C │\n",
+ "└─────────┴───────────┴───────────┴──────────┴───────────┴─────────────┴───────┴──────────┘"
]
},
"execution_count": 4,
@@ -160,27 +160,27 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (30_000, 8)row_num | uniform_1 | uniform_2 | exp | normal | fat_normal | flags | category |
---|
i64 | f64 | f64 | f64 | f64 | f64 | i32 | str |
4 | 7.874972 | 0.638764 | 2.159589 | 0.490217 | 1329.546535 | 0 | "A" |
5 | 1.479464 | 0.703649 | 0.48033 | 0.550675 | 186.879146 | 1 | "A" |
6 | 11.354059 | 0.881735 | 2.399495 | -0.720839 | -376.389466 | 2 | "A" |
8 | 3.539316 | 0.618786 | 2.968125 | -0.066433 | 3852.290115 | 0 | "A" |
12 | 2.881549 | 0.306653 | 1.828217 | 0.594723 | 1189.461612 | 0 | "A" |
13 | 11.286761 | 0.200552 | 4.836837 | 0.725427 | -18.039201 | 2 | "A" |
22 | 2.833185 | 0.043957 | 3.999065 | -0.735753 | 357.525422 | 2 | "A" |
25 | 11.359116 | 0.191183 | 0.703892 | 0.195382 | -1028.061174 | 0 | "A" |
27 | 6.276564 | 0.892571 | 0.734094 | 0.648778 | -1995.465652 | 0 | "A" |
32 | 11.959182 | 0.665287 | 2.574619 | 1.856659 | -982.271182 | 1 | "A" |
34 | 6.493756 | 0.947386 | 1.745573 | 0.261902 | 598.127735 | 0 | "A" |
41 | 9.549829 | 0.69802 | 2.068939 | -1.506053 | 57.884184 | 2 | "A" |
… | … | … | … | … | … | … | … |
99965 | 8.612262 | 0.692854 | 3.455756 | 0.779895 | 433.966147 | 2 | "C" |
99966 | 3.343289 | 0.487011 | 3.656845 | 0.91173 | 870.313838 | 1 | "C" |
99968 | 4.45978 | 0.079741 | 3.194479 | -0.210791 | -843.718407 | 2 | "C" |
99971 | 9.711985 | 0.050049 | 0.078004 | 1.655786 | -1371.070708 | 1 | "C" |
99972 | 1.047484 | 0.808619 | 0.347894 | 0.834854 | -1020.64733 | 2 | "C" |
99977 | 8.589407 | 0.535741 | 0.131577 | 1.839748 | -104.372155 | 1 | "C" |
99980 | 9.513081 | 0.958727 | 1.197324 | 2.042775 | -1971.061272 | 1 | "C" |
99982 | 2.048097 | 0.19573 | 1.607711 | 0.141 | -525.757109 | 1 | "C" |
99984 | 3.175364 | 0.521214 | 2.621609 | -0.259841 | -207.322052 | 1 | "C" |
99991 | 0.155865 | 0.938876 | 1.293072 | -3.370048 | 677.127453 | 2 | "C" |
99995 | 7.751136 | 0.991215 | 0.341938 | 0.74724 | 468.091164 | 1 | "C" |
99997 | 6.366458 | 0.825137 | 1.149404 | -0.653029 | 211.288954 | 1 | "C" |
"
+ "shape: (30_000, 8)row_num | uniform_1 | uniform_2 | exp | normal | fat_normal | flags | category |
---|
i64 | f64 | f64 | f64 | f64 | f64 | i32 | str |
14 | 3.29052 | 0.62766 | 0.684437 | -0.114606 | 1440.324971 | 0 | "A" |
15 | 10.033849 | 0.555832 | 0.265698 | 1.894409 | -2266.65647 | 1 | "A" |
18 | 10.209092 | 0.411413 | 1.781309 | 1.502181 | 63.521515 | 1 | "A" |
23 | 5.461989 | 0.631279 | 2.069071 | -0.735686 | -1463.48885 | 0 | "A" |
27 | 8.902278 | 0.732549 | 2.877614 | -0.049382 | -1301.745218 | 0 | "A" |
… | … | … | … | … | … | … | … |
99974 | 6.541671 | 0.052039 | 2.622168 | -0.451139 | -1270.959364 | 2 | "C" |
99976 | 1.147642 | 0.885223 | 0.704189 | 0.61381 | 344.861659 | 1 | "C" |
99985 | 7.321052 | 0.76491 | 6.039978 | -0.407754 | 304.340042 | 1 | "C" |
99991 | 10.25385 | 0.632615 | 0.263188 | -0.388282 | 273.417495 | 2 | "C" |
99995 | 1.948428 | 0.923293 | 6.168104 | -0.151161 | 997.159514 | 0 | "C" |
"
],
"text/plain": [
"shape: (30_000, 8)\n",
- "┌─────────┬───────────┬───────────┬──────────┬───────────┬─────────────┬───────┬──────────┐\n",
- "│ row_num ┆ uniform_1 ┆ uniform_2 ┆ exp ┆ normal ┆ fat_normal ┆ flags ┆ category │\n",
- "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
- "│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ str │\n",
- "╞═════════╪═══════════╪═══════════╪══════════╪═══════════╪═════════════╪═══════╪══════════╡\n",
- "│ 4 ┆ 7.874972 ┆ 0.638764 ┆ 2.159589 ┆ 0.490217 ┆ 1329.546535 ┆ 0 ┆ A │\n",
- "│ 5 ┆ 1.479464 ┆ 0.703649 ┆ 0.48033 ┆ 0.550675 ┆ 186.879146 ┆ 1 ┆ A │\n",
- "│ 6 ┆ 11.354059 ┆ 0.881735 ┆ 2.399495 ┆ -0.720839 ┆ -376.389466 ┆ 2 ┆ A │\n",
- "│ 8 ┆ 3.539316 ┆ 0.618786 ┆ 2.968125 ┆ -0.066433 ┆ 3852.290115 ┆ 0 ┆ A │\n",
- "│ 12 ┆ 2.881549 ┆ 0.306653 ┆ 1.828217 ┆ 0.594723 ┆ 1189.461612 ┆ 0 ┆ A │\n",
- "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
- "│ 99982 ┆ 2.048097 ┆ 0.19573 ┆ 1.607711 ┆ 0.141 ┆ -525.757109 ┆ 1 ┆ C │\n",
- "│ 99984 ┆ 3.175364 ┆ 0.521214 ┆ 2.621609 ┆ -0.259841 ┆ -207.322052 ┆ 1 ┆ C │\n",
- "│ 99991 ┆ 0.155865 ┆ 0.938876 ┆ 1.293072 ┆ -3.370048 ┆ 677.127453 ┆ 2 ┆ C │\n",
- "│ 99995 ┆ 7.751136 ┆ 0.991215 ┆ 0.341938 ┆ 0.74724 ┆ 468.091164 ┆ 1 ┆ C │\n",
- "│ 99997 ┆ 6.366458 ┆ 0.825137 ┆ 1.149404 ┆ -0.653029 ┆ 211.288954 ┆ 1 ┆ C │\n",
- "└─────────┴───────────┴───────────┴──────────┴───────────┴─────────────┴───────┴──────────┘"
+ "┌─────────┬───────────┬───────────┬──────────┬───────────┬──────────────┬───────┬──────────┐\n",
+ "│ row_num ┆ uniform_1 ┆ uniform_2 ┆ exp ┆ normal ┆ fat_normal ┆ flags ┆ category │\n",
+ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
+ "│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ str │\n",
+ "╞═════════╪═══════════╪═══════════╪══════════╪═══════════╪══════════════╪═══════╪══════════╡\n",
+ "│ 14 ┆ 3.29052 ┆ 0.62766 ┆ 0.684437 ┆ -0.114606 ┆ 1440.324971 ┆ 0 ┆ A │\n",
+ "│ 15 ┆ 10.033849 ┆ 0.555832 ┆ 0.265698 ┆ 1.894409 ┆ -2266.65647 ┆ 1 ┆ A │\n",
+ "│ 18 ┆ 10.209092 ┆ 0.411413 ┆ 1.781309 ┆ 1.502181 ┆ 63.521515 ┆ 1 ┆ A │\n",
+ "│ 23 ┆ 5.461989 ┆ 0.631279 ┆ 2.069071 ┆ -0.735686 ┆ -1463.48885 ┆ 0 ┆ A │\n",
+ "│ 27 ┆ 8.902278 ┆ 0.732549 ┆ 2.877614 ┆ -0.049382 ┆ -1301.745218 ┆ 0 ┆ A │\n",
+ "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
+ "│ 99974 ┆ 6.541671 ┆ 0.052039 ┆ 2.622168 ┆ -0.451139 ┆ -1270.959364 ┆ 2 ┆ C │\n",
+ "│ 99976 ┆ 1.147642 ┆ 0.885223 ┆ 0.704189 ┆ 0.61381 ┆ 344.861659 ┆ 1 ┆ C │\n",
+ "│ 99985 ┆ 7.321052 ┆ 0.76491 ┆ 6.039978 ┆ -0.407754 ┆ 304.340042 ┆ 1 ┆ C │\n",
+ "│ 99991 ┆ 10.25385 ┆ 0.632615 ┆ 0.263188 ┆ -0.388282 ┆ 273.417495 ┆ 2 ┆ C │\n",
+ "│ 99995 ┆ 1.948428 ┆ 0.923293 ┆ 6.168104 ┆ -0.151161 ┆ 997.159514 ┆ 0 ┆ C │\n",
+ "└─────────┴───────────┴───────────┴──────────┴───────────┴──────────────┴───────┴──────────┘"
]
},
"execution_count": 5,
@@ -207,7 +207,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (3, 2)flags | len |
---|
i32 | u32 |
0 | 33281 |
1 | 33469 |
2 | 33250 |
"
+ "shape: (3, 2)flags | len |
---|
i32 | u32 |
0 | 33282 |
1 | 33654 |
2 | 33064 |
"
],
"text/plain": [
"shape: (3, 2)\n",
@@ -216,9 +216,9 @@
"│ --- ┆ --- │\n",
"│ i32 ┆ u32 │\n",
"╞═══════╪═══════╡\n",
- "│ 0 ┆ 33281 │\n",
- "│ 1 ┆ 33469 │\n",
- "│ 2 ┆ 33250 │\n",
+ "│ 0 ┆ 33282 │\n",
+ "│ 1 ┆ 33654 │\n",
+ "│ 2 ┆ 33064 │\n",
"└───────┴───────┘"
]
},
@@ -246,7 +246,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (3, 2)flags | len |
---|
i32 | u32 |
0 | 16641 |
1 | 33469 |
2 | 33250 |
"
+ "shape: (3, 2)flags | len |
---|
i32 | u32 |
0 | 16641 |
1 | 33654 |
2 | 33064 |
"
],
"text/plain": [
"shape: (3, 2)\n",
@@ -256,8 +256,8 @@
"│ i32 ┆ u32 │\n",
"╞═══════╪═══════╡\n",
"│ 0 ┆ 16641 │\n",
- "│ 1 ┆ 33469 │\n",
- "│ 2 ┆ 33250 │\n",
+ "│ 1 ┆ 33654 │\n",
+ "│ 2 ┆ 33064 │\n",
"└───────┴───────┘"
]
},
@@ -290,7 +290,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (3, 2)flags | len |
---|
i32 | u32 |
0 | 16641 |
1 | 10041 |
2 | 13300 |
"
+ "shape: (3, 2)flags | len |
---|
i32 | u32 |
0 | 16641 |
1 | 10097 |
2 | 13226 |
"
],
"text/plain": [
"shape: (3, 2)\n",
@@ -300,8 +300,8 @@
"│ i32 ┆ u32 │\n",
"╞═══════╪═══════╡\n",
"│ 0 ┆ 16641 │\n",
- "│ 1 ┆ 10041 │\n",
- "│ 2 ┆ 13300 │\n",
+ "│ 1 ┆ 10097 │\n",
+ "│ 2 ┆ 13226 │\n",
"└───────┴───────┘"
]
},
@@ -466,7 +466,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (3, 2)category | len |
---|
str | u32 |
"A" | 10000 |
"B" | 4249 |
"C" | 5751 |
"
+ "shape: (3, 2)category | len |
---|
str | u32 |
"A" | 10000 |
"B" | 4302 |
"C" | 5698 |
"
],
"text/plain": [
"shape: (3, 2)\n",
@@ -476,8 +476,8 @@
"│ str ┆ u32 │\n",
"╞══════════╪═══════╡\n",
"│ A ┆ 10000 │\n",
- "│ B ┆ 4249 │\n",
- "│ C ┆ 5751 │\n",
+ "│ B ┆ 4302 │\n",
+ "│ C ┆ 5698 │\n",
"└──────────┴───────┘"
]
},
@@ -511,7 +511,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (9, 3)category | flags | len |
---|
str | i32 | u32 |
"A" | 0 | 9996 |
"A" | 1 | 9996 |
"A" | 2 | 9996 |
"B" | 0 | 9970 |
"B" | 1 | 9970 |
"B" | 2 | 9970 |
"C" | 0 | 13251 |
"C" | 1 | 13251 |
"C" | 2 | 13251 |
"
+ "shape: (9, 3)category | flags | len |
---|
str | i32 | u32 |
"A" | 0 | 9865 |
"A" | 1 | 9865 |
"A" | 2 | 9865 |
"B" | 0 | 9909 |
"B" | 1 | 9909 |
"B" | 2 | 9909 |
"C" | 0 | 13224 |
"C" | 1 | 13224 |
"C" | 2 | 13224 |
"
],
"text/plain": [
"shape: (9, 3)\n",
@@ -520,15 +520,15 @@
"│ --- ┆ --- ┆ --- │\n",
"│ str ┆ i32 ┆ u32 │\n",
"╞══════════╪═══════╪═══════╡\n",
- "│ A ┆ 0 ┆ 9996 │\n",
- "│ A ┆ 1 ┆ 9996 │\n",
- "│ A ┆ 2 ┆ 9996 │\n",
- "│ B ┆ 0 ┆ 9970 │\n",
- "│ B ┆ 1 ┆ 9970 │\n",
- "│ B ┆ 2 ┆ 9970 │\n",
- "│ C ┆ 0 ┆ 13251 │\n",
- "│ C ┆ 1 ┆ 13251 │\n",
- "│ C ┆ 2 ┆ 13251 │\n",
+ "│ A ┆ 0 ┆ 9865 │\n",
+ "│ A ┆ 1 ┆ 9865 │\n",
+ "│ A ┆ 2 ┆ 9865 │\n",
+ "│ B ┆ 0 ┆ 9909 │\n",
+ "│ B ┆ 1 ┆ 9909 │\n",
+ "│ B ┆ 2 ┆ 9909 │\n",
+ "│ C ┆ 0 ┆ 13224 │\n",
+ "│ C ┆ 1 ┆ 13224 │\n",
+ "│ C ┆ 2 ┆ 13224 │\n",
"└──────────┴───────┴───────┘"
]
},
@@ -563,7 +563,7 @@
" white-space: pre-wrap;\n",
"}\n",
"\n",
- "shape: (9, 3)category | flags | len |
---|
str | i32 | u32 |
"A" | 0 | 9996 |
"A" | 1 | 9996 |
"A" | 2 | 9996 |
"B" | 0 | 9970 |
"B" | 1 | 9970 |
"B" | 2 | 9970 |
"C" | 0 | 10000 |
"C" | 1 | 10000 |
"C" | 2 | 10000 |
"
+ "shape: (9, 3)category | flags | len |
---|
str | i32 | u32 |
"A" | 0 | 9865 |
"A" | 1 | 9865 |
"A" | 2 | 9865 |
"B" | 0 | 9909 |
"B" | 1 | 9909 |
"B" | 2 | 9909 |
"C" | 0 | 10000 |
"C" | 1 | 10000 |
"C" | 2 | 10000 |
"
],
"text/plain": [
"shape: (9, 3)\n",
@@ -572,12 +572,12 @@
"│ --- ┆ --- ┆ --- │\n",
"│ str ┆ i32 ┆ u32 │\n",
"╞══════════╪═══════╪═══════╡\n",
- "│ A ┆ 0 ┆ 9996 │\n",
- "│ A ┆ 1 ┆ 9996 │\n",
- "│ A ┆ 2 ┆ 9996 │\n",
- "│ B ┆ 0 ┆ 9970 │\n",
- "│ B ┆ 1 ┆ 9970 │\n",
- "│ B ┆ 2 ┆ 9970 │\n",
+ "│ A ┆ 0 ┆ 9865 │\n",
+ "│ A ┆ 1 ┆ 9865 │\n",
+ "│ A ┆ 2 ┆ 9865 │\n",
+ "│ B ┆ 0 ┆ 9909 │\n",
+ "│ B ┆ 1 ┆ 9909 │\n",
+ "│ B ┆ 2 ┆ 9909 │\n",
"│ C ┆ 0 ┆ 10000 │\n",
"│ C ┆ 1 ┆ 10000 │\n",
"│ C ┆ 2 ┆ 10000 │\n",
diff --git a/mkdocs.yml b/mkdocs.yml
index e751aa33..190fd62c 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -6,6 +6,8 @@ use_directory_urls: false
nav:
- Home: index.md
- Diagnosis: dia.md
+- Pipeline: pipeline.md
+- Sample: sample.md
- Numerical Extension: num.md
- Stats Extension: stats.md
- String Extension: str2.md
diff --git a/python/polars_ds/sample.py b/python/polars_ds/sample.py
index ca44c815..469ee9f5 100644
--- a/python/polars_ds/sample.py
+++ b/python/polars_ds/sample.py
@@ -47,9 +47,7 @@ def volume_neutral(
seed: Optional[int] = None,
) -> pl.DataFrame:
"""
- Say we have a reference column, which is discrete. Let's say it has three distinct values, A,
- B, and C, with a, b, c being the value counts. It will randomly select min(a, b, c, target_volume)
- rows from each category, thus the name volume neutral.
+ Select volume neutral many population from each segment in `by`, with optional control categories.
Parameters
----------
@@ -62,7 +60,7 @@ def volume_neutral(
Additional level(s). If not none, the volume neutral selection will happen at the
sublevel of the control column(s). See example.
target_volume
- If none, it will select min(a, b, c) rows, this means that one group is always fully selected.
+ If none, it will select min(a, b, c) rows.
seed
A random seed
"""