-
Notifications
You must be signed in to change notification settings - Fork 326
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
aggregate ..Sum
of integer column remains integer and handles overf…
…low (#11860) - This was mentioned in #7192 but didn't get a proper ticket. - Ensuring that summing integers gives an integer and not a float. - Only in-memory, as in Database the result type is database-dependent and we want it to be like that. - Also allowing the integer sum to overflow and become a `BigInteger`, in that case the resulting column will become `Decimal`.
- Loading branch information
Showing
23 changed files
with
445 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
32 changes: 32 additions & 0 deletions
32
std-bits/table/src/main/java/org/enso/table/aggregations/KnownTypeAggregator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import org.enso.table.data.column.builder.Builder; | ||
import org.enso.table.data.column.storage.type.StorageType; | ||
import org.enso.table.problems.ProblemAggregator; | ||
|
||
/** | ||
* A common subclass for aggregators that know their type on construction and use a standard | ||
* builder. | ||
*/ | ||
public abstract class KnownTypeAggregator extends Aggregator { | ||
private final StorageType type; | ||
|
||
protected KnownTypeAggregator(String name, StorageType type) { | ||
super(name); | ||
this.type = type; | ||
} | ||
|
||
@Override | ||
public Builder makeBuilder(int size, ProblemAggregator problemAggregator) { | ||
return Builder.getForType(type, size, problemAggregator); | ||
} | ||
|
||
/** | ||
* Return type of the column | ||
* | ||
* @return The type of the new column. | ||
*/ | ||
public StorageType getType() { | ||
return type; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
156 changes: 127 additions & 29 deletions
156
std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,158 @@ | ||
package org.enso.table.aggregations; | ||
|
||
import java.math.BigDecimal; | ||
import java.math.MathContext; | ||
import java.util.List; | ||
import org.enso.base.polyglot.NumericConverter; | ||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.column.storage.numeric.AbstractLongStorage; | ||
import org.enso.table.data.column.storage.numeric.DoubleStorage; | ||
import org.enso.table.data.column.storage.type.AnyObjectType; | ||
import org.enso.table.data.column.storage.type.BigDecimalType; | ||
import org.enso.table.data.column.storage.type.BigIntegerType; | ||
import org.enso.table.data.column.storage.type.FloatType; | ||
import org.enso.table.data.column.storage.type.IntegerType; | ||
import org.enso.table.data.column.storage.type.StorageType; | ||
import org.enso.table.data.table.Column; | ||
import org.enso.table.data.table.problems.InvalidAggregation; | ||
import org.enso.table.problems.ColumnAggregatedProblemAggregator; | ||
import org.enso.table.problems.ProblemAggregator; | ||
import org.graalvm.polyglot.Context; | ||
|
||
/** Aggregate Column computing the mean value in a group. */ | ||
public class Mean extends Aggregator { | ||
private static class Calculation { | ||
public long count; | ||
public double total; | ||
|
||
public Calculation(double value) { | ||
count = 1; | ||
total = value; | ||
} | ||
} | ||
|
||
public class Mean extends KnownTypeAggregator { | ||
private final Storage<?> storage; | ||
private final String columnName; | ||
|
||
public Mean(String name, Column column) { | ||
super(name, FloatType.FLOAT_64); | ||
super(name, resultTypeFromInput(column.getStorage())); | ||
this.storage = column.getStorage(); | ||
this.columnName = column.getName(); | ||
} | ||
|
||
private static StorageType resultTypeFromInput(Storage<?> inputStorage) { | ||
StorageType inputType = inputStorage.getType(); | ||
if (inputType instanceof AnyObjectType) { | ||
inputType = inputStorage.inferPreciseType(); | ||
} | ||
|
||
return switch (inputType) { | ||
case FloatType floatType -> FloatType.FLOAT_64; | ||
case IntegerType integerType -> FloatType.FLOAT_64; | ||
case BigIntegerType bigIntegerType -> BigDecimalType.INSTANCE; | ||
case BigDecimalType bigDecimalType -> BigDecimalType.INSTANCE; | ||
default -> throw new IllegalStateException( | ||
"Unexpected input type for Mean aggregate: " + inputType); | ||
}; | ||
} | ||
|
||
@Override | ||
public Object aggregate(List<Integer> indexes, ProblemAggregator problemAggregator) { | ||
ColumnAggregatedProblemAggregator innerAggregator = | ||
new ColumnAggregatedProblemAggregator(problemAggregator); | ||
Context context = Context.getCurrent(); | ||
Calculation current = null; | ||
for (int row : indexes) { | ||
Object value = storage.getItemBoxed(row); | ||
if (value != null) { | ||
Double dValue = NumericConverter.tryConvertingToDouble(value); | ||
if (dValue == null) { | ||
innerAggregator.reportColumnAggregatedProblem( | ||
new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); | ||
return null; | ||
MeanAccumulator accumulator = makeAccumulator(); | ||
accumulator.accumulate(indexes, storage, innerAggregator); | ||
return accumulator.summarize(); | ||
} | ||
|
||
private MeanAccumulator makeAccumulator() { | ||
return switch (getType()) { | ||
case FloatType floatType -> new FloatMeanAccumulator(); | ||
case BigDecimalType bigDecimalType -> new BigDecimalMeanAccumulator(); | ||
default -> throw new IllegalStateException( | ||
"Unexpected output type in Mean aggregate: " + getType()); | ||
}; | ||
} | ||
|
||
private abstract static class MeanAccumulator { | ||
abstract void accumulate( | ||
List<Integer> indexes, Storage<?> storage, ProblemAggregator problemAggregator); | ||
|
||
abstract Object summarize(); | ||
} | ||
|
||
private final class FloatMeanAccumulator extends MeanAccumulator { | ||
private double total = 0; | ||
private long count = 0; | ||
|
||
@Override | ||
void accumulate( | ||
List<Integer> indexes, Storage<?> storage, ProblemAggregator problemAggregator) { | ||
Context context = Context.getCurrent(); | ||
if (storage instanceof DoubleStorage doubleStorage) { | ||
for (int i : indexes) { | ||
if (!doubleStorage.isNothing(i)) { | ||
total += doubleStorage.getItemAsDouble(i); | ||
count++; | ||
} | ||
context.safepoint(); | ||
} | ||
} else if (storage instanceof AbstractLongStorage longStorage) { | ||
for (int i : indexes) { | ||
if (!longStorage.isNothing(i)) { | ||
total += longStorage.getItem(i); | ||
count++; | ||
} | ||
context.safepoint(); | ||
} | ||
} else { | ||
ColumnAggregatedProblemAggregator innerAggregator = | ||
new ColumnAggregatedProblemAggregator(problemAggregator); | ||
for (int i : indexes) { | ||
Object value = storage.getItemBoxed(i); | ||
if (value != null) { | ||
Double dValue = NumericConverter.tryConvertingToDouble(value); | ||
if (dValue == null) { | ||
innerAggregator.reportColumnAggregatedProblem( | ||
new InvalidAggregation(columnName, i, "Cannot convert to a Float.")); | ||
continue; | ||
} | ||
|
||
total += dValue; | ||
count++; | ||
} | ||
context.safepoint(); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
Object summarize() { | ||
return count == 0 ? null : total / count; | ||
} | ||
} | ||
|
||
private final class BigDecimalMeanAccumulator extends MeanAccumulator { | ||
private BigDecimal total = BigDecimal.ZERO; | ||
private long count = 0; | ||
|
||
if (current == null) { | ||
current = new Calculation(dValue); | ||
} else { | ||
current.count++; | ||
current.total += dValue; | ||
@Override | ||
void accumulate( | ||
List<Integer> indexes, Storage<?> storage, ProblemAggregator problemAggregator) { | ||
ColumnAggregatedProblemAggregator innerAggregator = | ||
new ColumnAggregatedProblemAggregator(problemAggregator); | ||
Context context = Context.getCurrent(); | ||
for (int i : indexes) { | ||
Object value = storage.getItemBoxed(i); | ||
if (value != null) { | ||
try { | ||
BigDecimal valueAsBigDecimal = NumericConverter.coerceToBigDecimal(value); | ||
total = total.add(valueAsBigDecimal); | ||
count++; | ||
} catch (UnsupportedOperationException error) { | ||
innerAggregator.reportColumnAggregatedProblem( | ||
new InvalidAggregation( | ||
columnName, i, "Cannot convert to a BigDecimal: " + error.getMessage())); | ||
continue; | ||
} | ||
} | ||
context.safepoint(); | ||
} | ||
} | ||
|
||
context.safepoint(); | ||
@Override | ||
Object summarize() { | ||
return count == 0 ? null : total.divide(BigDecimal.valueOf(count), MathContext.DECIMAL128); | ||
} | ||
return current == null ? null : current.total / current.count; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.