-
Notifications
You must be signed in to change notification settings - Fork 0
/
subtotal.js
214 lines (203 loc) · 8.99 KB
/
subtotal.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
export const CHILDREN = Symbol("CHILDREN");
export const DESCENDANT_COUNT = Symbol("DESCENDANT_COUNT");
export const GROUP = Symbol("GROUP");
export const IMPACT = Symbol("IMPACT");
export const INDEX = Symbol("INDEX");
export const LEVEL = Symbol("LEVEL");
export const PARENT = Symbol("PARENT");
export const RANK = Symbol("RANK");
export const SURPRISE = Symbol("SURPRISE");
const VISIT_ORDER = Symbol("VISIT_ORDER");
const IMPACT_RANK = Symbol("IMPACT_RANK");
/**
* @ignore
* Calculates a hierarchical tree structure based on the provided data and configuration.
*
* @function
* @param {Object} options - Configuration for the tree calculation.
* @param {Object[]} options.data - An array of objects representing the data rows.
* @param {(string[]|Object)} options.groups - ['col', ...] or {col: row => ...} defines the hierarchy levels of the tree.
* @param {(string[]|Object)} options.metrics - ['col', ...] or {col: 'sum', col: d => ...} specifies the metrics to aggregate.
* @param {string|Object} [options.sort] - '+col', '-col', or {col1: '-col2', ...} defines the sorting criteria for each level.
* @param {string|function} [options.impact] - '+col', '-col', or d => ... specifies the impact metric to rank insights by.
* @param {string|function} [options.rankBy] - '+col', '-col', or d => ... specifies the metric to traverse insights by.
* @param {string} [options.totalGroup="Total"] - Name of the total row's `GROUP`.
*
* @returns {Object[]} - Returns an array of objects representing the calculated tree structure.
*
* @throws {Error} Throws an error for invalid `groups`, `metrics`, `sort`, or `impact` configurations.
*/
export function subtotal({
data,
groups,
metrics,
sort,
impact,
rankBy = (v) => v[IMPACT] * v[SURPRISE],
totalGroup = "Total",
}) {
if (typeof groups !== "object" || !groups) throw new Error(`Invalid groups: ${groups}`);
// Convert array of strings [x, y, z] into object {x: x, y: y, z: z}
if (Array.isArray(groups)) groups = Object.fromEntries(groups.map((col) => [col, col]));
const groupNames = Object.keys(groups);
// Convert string values into accessors
const groupValues = Object.values(groups).map((col) => (typeof col === "function" ? col : (d) => d[col]));
// Check if metrics is a valid object or array, and convert it to the desired format
if (typeof metrics != "object" || metrics === null)
throw new Error(`metrics must be ['col', ...] or {col: 'sum', col: d => ...}, not ${metrics}`);
if (Array.isArray(metrics)) metrics = metrics.map((key) => [key, (d) => agg.sum(key, d)]);
else
metrics = Object.entries(metrics).map(([key, value]) => [
key,
typeof value === "function" ? value : (d) => agg[value](key, d),
]);
// Check if sort is a valid object, string, or undefined, and convert it to the desired format
if ((typeof sort != "object" && typeof sort != "string" && sort !== undefined) || sort === null)
throw new Error("sort must be '+col', '-col', or {col1: '-col2', ...}");
// Convert sorts values into a function that sorts the data by the given column
const sorts = groupNames
.map((col) => (typeof sort === "string" || sort === undefined ? sort : sort[col]))
.map((order) =>
typeof order == "string"
? order[0] == "+"
? sorters.ascending(order.slice(1))
: order[0] == "-"
? sorters.descending(order.slice(1))
: sorters.ascending(order)
: order
)
.map((order) => (order ? (a, b) => order(a.metrics, b.metrics) : order));
// Convert impact to a function that returns a number that's sortable
impact = sortableToFunction(impact);
rankBy = sortableToFunction(rankBy);
// Return { metric: fn(data) } for based on each metric's function
function reduce(data, context) {
const result = Object.assign({}, context);
for (const [key, fn] of metrics) result[key] = fn(data, result);
return result;
}
// Calculate the hierarchical tree structure based on the provided data and configuration
const tree = nest(data, groupNames, groupValues, reduce, { [GROUP]: totalGroup });
// Flatten the tree structure into an array, applying sorting rules to each level
const result = flatten(tree, sorts);
// Calculate the impact metric for each element in the result array and sort the array based on the impact metric
result
.map((v) => [impact(v), v])
.sort((a, b) => (a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0))
.forEach(([val, v], i) => (v[IMPACT_RANK] = i + 1) && (v[IMPACT] = val));
// Traverse the tree in a rank-based order to assign VISIT_ORDER to each element
traverseTree([tree]);
// Calculate the minimum and maximum impact values from the result array
const [minImpact, maxImpact] = [Math.min(...result.map((v) => v[IMPACT])), Math.max(...result.map((v) => v[IMPACT]))];
// Normalize the impact and surprise metrics for each element in the result array
result.forEach((v) => {
v[IMPACT] = (v[IMPACT] - minImpact) / (maxImpact - minImpact);
v[SURPRISE] = v[VISIT_ORDER] / result.length;
});
// Calculate the rank metric for each element in the result array and sort the array based on the rank metric
result
.map((v) => [rankBy(v), v])
.sort((a, b) => b[0] - a[0])
.forEach(([, v], i) => (v[RANK] = i + 1));
// Assign the index value to each element in the result array
result.forEach((v, i) => (v[INDEX] = i));
// Return the final result array
return result;
}
// d3-array rollup does not support subtotals.
// https://github.com/d3/d3-array/blob/main/src/group.js
// Revise it to support subtotals at every level. Instead of:
// tree = {label: {label: ...}
// ... it returns:
// tree = {label: {metrics: {}, children: {label: ...}}
function nest(data, groupNames, groupValues, reduce, context = {}) {
return (function regroup(data, i) {
context[LEVEL] = i;
const metrics = reduce(data, context);
if (i >= groupValues.length) return { metrics };
const children = new Map();
const groupName = groupNames[i];
const keyof = groupValues[i++];
let index = -1;
for (const value of data) {
const key = keyof(value, ++index, data);
const child = children.get(key);
if (child) child.push(value);
else children.set(key, [value]);
}
for (const [key, data] of children) {
context[groupName] = context[GROUP] = key;
children.set(key, regroup(data, i));
delete context[groupName];
}
return { metrics, children: children };
})(data, 0);
}
/**
* @ignore
* Flattens a tree structure into an array, while applying sorting rules to each level.
* @param {Object} tree - The tree structure to flatten.
* @param {Array<Function>} sorts - An array of sorting functions to apply at each level of the tree.
* @returns {Array} - The flattened array.
*/
function flatten(tree, sorts) {
const result = [tree.metrics];
const childTrees = [...(tree.children || [])].map(([, v]) => v);
const level = tree.metrics[LEVEL];
// Each node gets CHILDREN (list of direct children) and DESCENDANT_COUNT
tree.metrics[CHILDREN] = childTrees;
// DESCENDANT_COUNT = size of added flattened tree = newSize - currentSize
// Here, we set DESCENDANT_COUNT to -currentSize and later add +newSize
tree.metrics[DESCENDANT_COUNT] = -result.length;
// Sort the subtree based on the level-specific sort before flattening
if (sorts[level]) childTrees.sort(sorts[level]);
// Append the flattened subtree
for (const subtree of childTrees) {
subtree.metrics[PARENT] = tree.metrics;
result.push(...flatten(subtree, sorts));
}
// Update DESCENDANT_COUNT to the size
tree.metrics[DESCENDANT_COUNT] += result.length;
return result;
}
// Convert sortable column to a sorting function
function sortableToFunction(column, name) {
if (column === undefined) return () => 0;
else if (typeof column === "string") {
if (column[0] === "-") {
const col = column.slice(1);
return (d) => -d[col];
} else if (column[0] === "+") {
const col = column.slice(1);
return (d) => d[col];
} else {
return (d) => d[column];
}
} else if (typeof column === "function") {
return column;
} else {
throw new Error(`${name} must be a '+col', '-col', d => ..., not ${column}`);
}
}
// Add visit order via rank-based traversal
function traverseTree(pendingList, visitOrder = 0) {
while (pendingList.length) {
const node = pendingList.shift();
node.metrics[VISIT_ORDER] = visitOrder++;
if (node.children) {
pendingList = pendingList.concat(Array.from(node.children.values()));
pendingList.sort((a, b) => a.metrics[IMPACT_RANK] - b.metrics[IMPACT_RANK]);
}
}
}
export const agg = {
sum: (key, v) => v.reduce((a, v) => +v[key] + a, 0),
count: (key, v) => v.length,
avg: (key, v) => v.reduce((a, v) => +v[key] + a, 0) / v.length,
min: (key, v) => Math.min(...v.map((d) => +d[key])),
max: (key, v) => Math.max(...v.map((d) => +d[key])),
};
const sorters = {
ascending: (key) => (a, b) => (a[key] < b[key] ? -1 : a[key] > b[key] ? 1 : 0),
descending: (key) => (a, b) => (a[key] > b[key] ? -1 : a[key] < b[key] ? 1 : 0),
};