diff --git a/docs/index.html b/docs/index.html index d20764c..cc0b04e 100644 --- a/docs/index.html +++ b/docs/index.html @@ -295,7 +295,6 @@
….
tech.ml.dataset is a great and fast library which brings columnar dataset to the Clojure. Chris Nuernberger has been working on this library for last year as a part of bigger tech.ml
stack.
I’ve started to test the library and help to fix uncovered bugs. My main goal was to compare functionalities with the other standards from other platforms. I focused on R solutions: dplyr, tidyr and data.table.
During conversions of the examples I’ve come up how to reorganized existing tech.ml.dataset
functions into simple to use API. The main goals were:
take 2 (tc/rows ds)) (
time.LocalDate 0x1c5f3a8 "2012-01-01"]
+ ([#object[java.time.LocalDate 0x43abe9c4 "2012-01-01"]
([#object[java.0.0
12.8
5.0
4.7
"drizzle"]
- time.LocalDate 0x5805d953 "2012-01-02"]
+ [#object[java.time.LocalDate 0x7a714db8 "2012-01-02"]
[#object[java.10.9
10.6
2.8
@@ -4578,7 +4577,7 @@ Rename
v1
v2
[1 2 3]
-java.lang.Object@396269aa
+java.lang.Object@88b0370
@@ -4818,7 +4817,7 @@ Rename
v1
v2
[1 2 3]
-java.lang.Object@75a749d5
+java.lang.Object@23c1653c
@@ -4878,7 +4877,7 @@ Rename
v1
v2
[1 2 3]
-java.lang.Object@75a749d5
+java.lang.Object@23c1653c
@@ -5033,55 +5032,55 @@ Add or update
-0.86401689
+0.07257872
1
0.5
A
-0.14043806
+0.45689050
2
1.0
B
-0.68097777
+0.52503725
3
1.5
C
-0.91875674
+0.11928382
4
0.5
A
-0.24475970
+0.37822512
5
1.0
B
-0.06825930
+0.35239845
6
1.5
C
-0.04138129
+0.17997252
7
0.5
A
-0.87095448
+0.11440262
8
1.0
B
-0.73301107
+0.78541957
9
1.5
C
@@ -5972,7 +5971,7 @@ Update
1
-7
+9
0.5
A
@@ -5984,43 +5983,43 @@ Update
1
-5
+1
1.5
C
2
-9
+4
0.5
A
1
-6
+7
1.0
B
2
-3
+5
1.5
C
1
-8
+3
0.5
A
2
-4
+8
1.0
B
1
-1
+6
1.5
C
@@ -7508,10 +7507,10 @@ Other
-1
-9
-1.5
-C
+2
+2
+1.0
+B
@@ -7561,23 +7560,23 @@ Other
-2
-4
-0.5
-A
-
-
1
-1
-0.5
-A
+9
+1.5
+C
-
-1
-3
+
+2
+6
1.5
C
+
+2
+4
+0.5
+A
+
1
5
@@ -7592,27 +7591,27 @@ Other
1
-9
+3
1.5
C
-1
-7
+2
+4
0.5
A
-1
-9
-1.5
-C
+2
+4
+0.5
+A
-2
-2
-1.0
-B
+1
+1
+0.5
+A
@@ -7636,34 +7635,34 @@ Other
-2
-8
-1.0
-B
+1
+1
+0.5
+A
-2
-8
-1.0
-B
+1
+9
+1.5
+C
2
-2
-1.0
-B
-
-
-2
6
1.5
C
-
+
2
+4
+0.5
+A
+
+
2
-1.0
-B
+6
+1.5
+C
@@ -7694,27 +7693,27 @@ Other
2
-2
-1.0
-B
+6
+1.5
+C
-1
-1
-0.5
-A
-
-
2
4
0.5
A
+
+1
+9
+1.5
+C
+
-2
-8
-1.0
-B
+1
+1
+0.5
+A
@@ -7789,34 +7788,34 @@ Other
1
-7
+1
0.5
A
-2
-6
+1
+9
1.5
C
+2
+8
+1.0
+B
+
+
1
3
1.5
C
-
-1
+
1
+7
0.5
A
-
-2
-8
-1.0
-B
-
2
2
@@ -7824,23 +7823,23 @@ Other
B
-1
-5
-1.0
-B
-
-
2
4
0.5
A
-
-1
-9
+
+2
+6
1.5
C
+
+1
+5
+1.0
+B
+
@@ -8151,8 +8150,8 @@ Other
-2
-4
+1
+7
0.5
A
@@ -8163,14 +8162,14 @@ Other
A
-1
-1
+2
+4
0.5
A
-1
-7
+2
+4
0.5
A
@@ -8182,7 +8181,7 @@ Other
2
-2
+8
1.0
B
@@ -8193,8 +8192,8 @@ Other
B
-1
-5
+2
+8
1.0
B
@@ -8211,26 +8210,26 @@ Other
B
-1
-9
+2
+6
1.5
C
-1
-3
+2
+6
1.5
C
-1
-3
+2
+6
1.5
C
1
-3
+9
1.5
C
@@ -9790,16 +9789,16 @@ Strategies
-2
-4
+1
+1
0.5
A
-1
-9
-1.5
-C
+2
+4
+0.5
+A
@@ -13549,15 +13548,15 @@ Array column conve
:a
-[D@4f39fc33
+[D@30b08cf4
:b
-[D@26d55d8a
+[D@1e074828
:c
-[D@6168dfaf
+[D@5b5773c1
@@ -16551,8 +16550,8 @@ Longer
1
1
0
-0.94816679
-0.87288772
+0.30910644
+0.48917879
3
-2
@@ -16560,8 +16559,8 @@ Longer
2
1
1
-0.06125362
-0.11137475
+0.54225370
+0.27471082
3
-2
@@ -16569,8 +16568,8 @@ Longer
3
0
1
-0.90309774
-0.84951496
+0.90517086
+0.55928184
3
-2
@@ -16578,8 +16577,8 @@ Longer
4
0
1
-0.55768353
-0.29332929
+0.15731113
+0.44552143
3
-2
@@ -16610,7 +16609,7 @@ Longer
1
0
1
-0.94816679
+0.30910644
3
@@ -16618,7 +16617,7 @@ Longer
1
1
1
-0.06125362
+0.54225370
3
@@ -16626,7 +16625,7 @@ Longer
0
1
1
-0.90309774
+0.90517086
3
@@ -16634,7 +16633,7 @@ Longer
0
1
1
-0.55768353
+0.15731113
3
@@ -16642,7 +16641,7 @@ Longer
1
0
2
-0.87288772
+0.48917879
-2
@@ -16650,7 +16649,7 @@ Longer
1
1
2
-0.11137475
+0.27471082
-2
@@ -16658,7 +16657,7 @@ Longer
0
1
2
-0.84951496
+0.55928184
-2
@@ -16666,7 +16665,7 @@ Longer
0
1
2
-0.29332929
+0.44552143
-2
@@ -25447,25 +25446,13 @@ Concat
1
-1
-0.5
-A
-
-
-2
-8
+5
1.0
B
-
-2
-6
-1.5
-C
-
-2
-6
+1
+3
1.5
C
@@ -25476,16 +25463,16 @@ Concat
B
-1
-7
+2
+4
0.5
A
-1
-5
-1.0
-B
+2
+6
+1.5
+C
2
@@ -25494,17 +25481,29 @@ Concat
A
-2
-8
+1
+5
1.0
B
1
-9
+7
+0.5
+A
+
+
+2
+6
1.5
C
+
+1
+1
+0.5
+A
+
…
…
@@ -25513,39 +25512,27 @@ Concat
1
-1
-0.5
-A
+5
+1.0
+B
1
-1
-0.5
-A
-
-
-1
9
1.5
C
-
-2
-2
-1.0
-B
-
-2
-8
-1.0
-B
+1
+1
+0.5
+A
-2
-6
-1.5
-C
+1
+7
+0.5
+A
1
@@ -25555,16 +25542,28 @@ Concat
1
-9
-1.5
-C
+7
+0.5
+A
+1
+1
+0.5
+A
+
+
2
2
1.0
B
+
+1
+7
+0.5
+A
+
1
5
@@ -25786,53 +25785,53 @@ Union
A
-1
-7
-0.5
-A
-
-
2
2
1.0
B
-
+
1
-9
+3
+1.5
+C
+
+
+2
+6
1.5
C
+1
+7
+0.5
+A
+
+
2
8
1.0
B
-
-2
-4
-0.5
-A
-
1
-3
+9
1.5
C
-2
-6
-1.5
-C
-
-
1
5
1.0
B
+
+2
+4
+0.5
+A
+
_unnamed, (splitted) [38 5]:
+_unnamed, (splitted) [34 5]:
13 | +12 | :a | :g1 | :train | 0 | |||
5 | +17 | :a | -:g2 | +:g3 | :train | 0 | ||
23 | -:b | -:g3 | +12 | +:a | +:g1 | :train | 0 | |
2 | +5 | :a | -:g1 | +:g3 | :train | 0 | ||
5 | +19 | :a | :g2 | :train | 0 | |||
3 | -:a | +21 | +:b | :g3 | :train | 0 | ||
1 | +11 | :a | -:g3 | +:g2 | :train | 0 | ||
1 | +17 | :a | :g3 | :train | 0 | |||
21 | +20 | :b | -:g1 | +:g3 | :train | 0 | ||
11 | +5 | :a | :g3 | :train | @@ -27063,79 +27062,79 @@… | |||
6 | +18 | :a | -:g3 | -:test | +:g1 | +:train | 0 | |
8 | -:a | -:g2 | -:test | +23 | +:b | +:g3 | +:train | 0 |
10 | +0 | :a | -:g1 | +:g2 | :test | 0 | ||
14 | +2 | :a | :g1 | :test | 0 | |||
15 | +3 | :a | -:g3 | +:g1 | :test | 0 | ||
16 | +4 | :a | -:g3 | +:g2 | :test | 0 | ||
17 | +6 | :a | -:g3 | +:g2 | :test | 0 | ||
18 | +8 | :a | -:g3 | +:g2 | :test | 0 | ||
19 | +13 | :a | -:g2 | +:g3 | :test | 0 | ||
22 | -:b | +14 | +:a | :g2 | :test | 0 | ||
24 | +22 | :b | -:g2 | +:g3 | :test | 0 | ||
1 | -:a | +22 | +:b | :g3 | :train | 0 | ||
21 | -:b | -:g1 | +14 | +:a | +:g2 | :train | 0 | |
17 | +19 | :a | -:g3 | +:g2 | :train | 0 | ||
19 | +8 | :a | :g2 | :train | 0 | |||
20 | -:b | +6 | +:a | :g2 | :train | 0 | ||
11 | +18 | :a | -:g3 | +:g1 | :test | 0 | ||
7 | +11 | :a | -:g1 | +:g2 | :test | 0 | ||
14 | -:a | -:g1 | +24 | +:b | +:g3 | :test | 0 | |
22 | -:b | +15 | +:a | :g2 | :test | 0 | ||
8 | +2 | :a | -:g2 | +:g1 | :test | 0 | … | |
6 | +13 | :a | :g3 | :test | 0 | |||
9 | -:a | -:g2 | +23 | +:b | +:g3 | :test | 0 | |
24 | -:b | -:g2 | +10 | +:a | +:g1 | :test | 0 | |
10 | +9 | :a | :g1 | :test | @@ -27283,49 +27282,49 @@||||
12 | :a | -:g2 | +:g1 | :test | 0 | |||
5 | +0 | :a | :g2 | :test | 0 | |||
3 | +5 | :a | :g3 | :test | 0 | |||
4 | +7 | :a | -:g1 | +:g3 | :test | 0 | ||
13 | -:a | -:g1 | +20 | +:b | +:g3 | :test | 0 | |
16 | -:a | +21 | +:b | :g3 | :test | 0 | ||
2 | +1 | :a | -:g1 | +:g3 | :test | 0 | ||
16 | +11 | :a | -:g3 | +:g2 | :train | 0 | ||
8 | +9 | :a | -:g2 | +:g1 | :train | 0 | ||
17 | +16 | :a | -:g3 | +:g2 | :test | 0 | ||
18 | -:a | +23 | +:b | :g3 | :test | 0 | ||
22 | -:b | -:g2 | +3 | +:a | +:g1 | :test | 0 | |
5 | +10 | :a | -:g2 | +:g1 | :test | 0 | ||
1 | +7 | :a | :g3 | :test | 0 | |||
23 | -:b | -:g3 | +15 | +:a | +:g2 | :split-2 | 0 | |
14 | +1 | :a | -:g1 | +:g3 | :split-2 | 0 | ||
4 | +13 | :a | -:g1 | +:g3 | :split-2 | 0 | … | |
7 | +4 | :a | -:g1 | +:g2 | :split-3 | 0 | ||
10 | +5 | :a | -:g1 | +:g3 | :split-3 | 0 | ||
2 | +8 | :a | -:g1 | +:g2 | :split-3 | 0 | ||
6 | -:a | +24 | +:b | :g3 | :split-4 | 0 | ||
9 | -:a | -:g2 | +21 | +:b | +:g3 | :split-4 | 0 | |
15 | +19 | :a | -:g3 | +:g2 | :split-4 | 0 | ||
19 | +17 | :a | -:g2 | +:g3 | :split-4 | 0 | ||
20 | -:b | -:g2 | +12 | +:a | +:g1 | :split-4 | 0 | |
13 | +14 | :a | -:g1 | +:g2 | :split-4 | 0 | ||
21 | -:b | +18 | +:a | :g1 | :split-4 | 0 | ||
24 | +20 | :b | -:g2 | +:g3 | :split-4 | 0 | ||
16 | +3 | :a | -:g3 | +:g1 | small | 0 | ||
2 | +15 | :a | -:g1 | +:g2 | small | 0 | ||
0 | +19 | :a | -:g1 | +:g2 | small | 0 | ||
13 | +16 | :a | -:g1 | +:g2 | small | 0 | ||
20 | -:b | +11 | +:a | :g2 | small | 0 | ||
23 | -:b | +13 | +:a | :g3 | smaller | 0 | ||
1 | -:a | +22 | +:b | :g3 | smaller | 0 | ||
21 | -:b | +2 | +:a | :g1 | smaller | 0 | ||
9 | +1 | :a | -:g2 | +:g3 | big | 0 | ||
17 | +12 | :a | -:g3 | +:g1 | big | 0 | … | |
7 | -:a | -:g1 | +21 | +:b | +:g3 | big | 0 | |
4 | -:a | -:g1 | +24 | +:b | +:g3 | big | 0 | |
14 | +17 | :a | -:g1 | +:g3 | big | 0 | ||
19 | +8 | :a | :g2 | big | 0 | |||
24 | -:b | +6 | +:a | :g2 | big | 0 | ||
10 | +14 | :a | -:g1 | +:g2 | big | 0 | ||
3 | +18 | :a | -:g3 | +:g1 | big | 0 | ||
18 | +9 | :a | -:g3 | +:g1 | the rest | 0 | ||
22 | -:b | +0 | +:a | :g2 | the rest | 0 | ||
12 | +4 | :a | :g2 | the rest | 0 | |||
6 | -:a | +23 | +:b | :g3 | the rest | 0 | @@ -27845,212 +27844,212 @@||
3 | +14 | :a | -:g3 | +:g2 | :train | 0 | ||
12 | +10 | :a | -:g2 | +:g1 | :train | 0 | ||
13 | +9 | :a | :g1 | :train | 0 | |||
8 | +15 | :a | :g2 | :train | 0 | |||
17 | +11 | :a | -:g3 | +:g2 | :train | 0 | ||
22 | -:b | -:g2 | +12 | +:a | +:g1 | :train | 0 | |
19 | +5 | :a | -:g2 | +:g3 | :train | 0 | ||
18 | -:a | +20 | +:b | :g3 | :train | 0 | ||
23 | -:b | -:g3 | +19 | +:a | +:g2 | :train | 0 | |
24 | -:b | +6 | +:a | :g2 | :train | 0 | ||
14 | +2 | :a | :g1 | :train | 0 | |||
1 | +16 | :a | -:g3 | +:g2 | :train | 0 | ||
16 | -:a | +22 | +:b | :g3 | :train | 0 | ||
4 | +8 | :a | -:g1 | +:g2 | :train | 0 | ||
9 | +13 | :a | -:g2 | +:g3 | :train | 0 | ||
21 | -:b | -:g1 | +17 | +:a | +:g3 | :train | 0 | |
5 | +0 | :a | :g2 | :train | 0 | |||
15 | +4 | :a | -:g3 | +:g2 | :train | 0 | ||
20 | +23 | :b | -:g2 | +:g3 | :train | 0 | ||
11 | -:a | +21 | +:b | :g3 | :train | 0 | ||
6 | +3 | :a | -:g3 | +:g1 | :train | 0 | ||
10 | +1 | :a | -:g1 | +:g3 | :train | 0 | ||
7 | +18 | :a | :g1 | :train | 0 | |||
2 | -:a | -:g1 | +24 | +:b | +:g3 | :train | 0 | |
0 | +7 | :a | -:g1 | +:g3 | :test | 0 | ||
0 | +7 | :a | -:g1 | +:g3 | :train | 1 | ||
12 | +10 | :a | -:g2 | +:g1 | :train | 1 | ||
13 | +9 | :a | :g1 | :train | 1 | |||
8 | +15 | :a | :g2 | :train | 1 | |||
17 | +11 | :a | -:g3 | +:g2 | :train | 1 | ||
:g1 | +:g2 | 0 | -Group: :g1, (splitted) [9 5]: | +Group: :g2, (splitted) [9 5]: | ||||
:g3 | 1 | -Group: :g3, (splitted) [10 5]: | +Group: :g3, (splitted) [12 5]: | |||||
:g2 | +:g1 | 2 | -Group: :g2, (splitted) [9 5]: | +Group: :g1, (splitted) [8 5]: |
[
:g1
+[
:g2
(
{
@@ -28320,7 +28319,7 @@ Split as a sequence
-Group: 0 [7 3]:
+Group: 0 [8 3]:
@@ -28331,39 +28330,44 @@ Split as a sequence
-14
+15
:a
-:g1
+:g2
0
:a
-:g1
+:g2
-4
+11
:a
-:g1
+:g2
-14
+19
:a
-:g1
+:g2
-7
+4
:a
-:g1
+:g2
-13
+8
:a
-:g1
+:g2
-21
-:b
-:g1
+16
+:a
+:g2
+
+
+14
+:a
+:g2
@@ -28384,7 +28388,7 @@ Split as a sequence
-Group: 0 [2 3]:
+Group: 0 [1 3]:
@@ -28395,14 +28399,9 @@ Split as a sequence
-2
-:a
-:g1
-
-
-10
+6
:a
-:g1
+:g2
@@ -28424,7 +28423,7 @@ Split as a sequence
-Group: 1 [7 3]:
+Group: 1 [8 3]:
@@ -28435,39 +28434,44 @@ Split as a sequence
-0
+8
:a
-:g1
+:g2
-14
+19
:a
-:g1
+:g2
-13
+14
:a
-:g1
+:g2
-7
+0
:a
-:g1
+:g2
-4
+6
:a
-:g1
+:g2
-14
+19
:a
-:g1
+:g2
-21
-:b
-:g1
+4
+:a
+:g2
+
+
+15
+:a
+:g2
@@ -28499,14 +28503,14 @@ Split as a sequence
-2
+11
:a
-:g1
+:g2
-10
+16
:a
-:g1
+:g2
@@ -33144,19 +33148,19 @@
1
-7
-0.5
-A
+5
+1.0
+B
2
-8
+2
1.0
B
-2
-6
+1
+9
1.5
C
@@ -33182,31 +33186,31 @@
1
-7
-0.5
-A
-
-
-1
3
1.5
C
+
+2
+8
+1.0
+B
+
1
-9
-1.5
-C
+5
+1.0
+B
1
-9
-1.5
-C
+7
+0.5
+A
-2
-6
+1
+3
1.5
C
diff --git a/notebooks/index.clj b/notebooks/index.clj
index f4e2cdd..fc05749 100644
--- a/notebooks/index.clj
+++ b/notebooks/index.clj
@@ -27,8 +27,6 @@ tablecloth-version
## Introduction
-....
-
[tech.ml.dataset](https://github.com/techascent/tech.ml.dataset) is a great and fast library which brings columnar dataset to the Clojure. Chris Nuernberger has been working on this library for last year as a part of bigger `tech.ml` stack.
I've started to test the library and help to fix uncovered bugs. My main goal was to compare functionalities with the other standards from other platforms. I focused on R solutions: [dplyr](https://dplyr.tidyverse.org/), [tidyr](https://tidyr.tidyverse.org/) and [data.table](https://rdatatable.gitlab.io/data.table/).