Skip to content

Commit

Permalink
Refactoring Storage (#12119)
Browse files Browse the repository at this point in the history
  • Loading branch information
jdunkerley authored Jan 27, 2025
1 parent 5f53340 commit f25164d
Show file tree
Hide file tree
Showing 140 changed files with 1,759 additions and 2,133 deletions.
2 changes: 1 addition & 1 deletion distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -2366,7 +2366,7 @@ type Column
to_js_object self =
name = self.java_column.getName
storage = self.java_column.getStorage
storage_proxy = Array_Proxy.new storage.size i-> storage.getItemBoxed i
storage_proxy = Array_Proxy.new storage.getSize i-> storage.getItemBoxed i
storage_json = Vector.from_polyglot_array storage_proxy
JS_Object.from_pairs [["name", name], ["data", storage_json]]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ import project.Internal.Storage
import project.Value_Type.Value_Type
from project.Errors import Conversion_Failure

polyglot java import org.enso.table.data.column.storage.type.StorageType

## PRIVATE
Checks if one type can be cast into another and returns a dataflow error
explaining the situation if not.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ map_over_storage : Column -> (Any -> Text) -> (Integer -> Any) -> Boolean -> Pro
map_over_storage input_column function builder skip_nothing=True on_problems:Problem_Behavior=..Report_Warning =
problem_builder = Problem_Builder.new
input_storage = input_column.java_column.getStorage
num_input_rows = input_storage.size
num_input_rows = input_storage.getSize
output_storage_builder = builder num_input_rows
0.up_to num_input_rows . each i->
input_value = input_storage.getItemBoxed i
Expand All @@ -33,12 +33,12 @@ map_2_over_storage : Column -> Column -> (Any -> Any -> Text) -> (Integer -> Any
map_2_over_storage input_column_0 input_column_1 function builder skip_nothing=True =
input_storage_0 = input_column_0.java_column.getStorage
input_storage_1 = input_column_1.java_column.getStorage
case input_storage_0.size != input_storage_1.size of
case input_storage_0.getSize != input_storage_1.getSize of
True ->
msg = "Column lengths differ: " + input_storage_0.size.to_text + " != " + input_storage_1.size.to_text
msg = "Column lengths differ: " + input_storage_0.getSize.to_text + " != " + input_storage_1.getSize.to_text
Error.throw (Illegal_Argument.Error msg)
False ->
num_input_rows = input_storage_0.size
num_input_rows = input_storage_0.getSize
output_storage_builder = builder num_input_rows
ok = 0.up_to num_input_rows . each_propagate i->
input_value_0 = input_storage_0.getItemBoxed i
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ fan_out_to_rows_and_columns table input_column_id function column_names at_least
fan_out_to_rows_and_columns_fixed : Any -> (Any -> Vector (Vector Any)) -> Boolean -> Vector Text -> (Integer -> Any) -> Problem_Builder -> Vector
fan_out_to_rows_and_columns_fixed input_storage function at_least_one_row:Boolean column_names:Vector column_builder problem_builder =
num_output_columns = column_names.length
num_input_rows = input_storage.size
num_input_rows = input_storage.getSize

# Accumulates the outputs of the function.
output_column_builders = Vector.new num_output_columns _-> column_builder num_input_rows
Expand Down Expand Up @@ -176,7 +176,7 @@ fan_out_to_rows_and_columns_dynamic input_storage function at_least_one_row colu
output_column_builders = Builder.new

# Guess that most of the time, we'll get at least one value for each input.
num_input_rows = input_storage.size
num_input_rows = input_storage.getSize

# Column Builder add function
add_column n current_length =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ make_long_builder initial_size bits java_problem_aggregator=(Missing_Argument.en
make_string_builder : Integer -> Value_Type -> Builder
make_string_builder initial_size value_type=Value_Type.Char =
storage_type = Storage.from_value_type_strict value_type
Builder.getForType storage_type initial_size Nothing
Builder.getForText storage_type initial_size

## PRIVATE
make_inferred_builder : Integer -> ProblemAggregator -> Builder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ public static Table runReport(

var builders = new Builder[dimensions.size() + metrics.size()];
for (int i = 0; i < dimensions.size() + metrics.size(); i++) {
builders[i] = Builder.getForType(TextType.VARIABLE_LENGTH, rowCount, null);
builders[i] = Builder.getForText(TextType.VARIABLE_LENGTH, rowCount);
}

// Load the data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public Storage<?> seal() {
resize(currentSize);
return switch (mode) {
case LONG -> new LongStorage(ints, currentSize, intsMissing, IntegerType.INT_64);
case BIG_INTEGER -> new BigIntegerStorage(bigInts, currentSize);
case BIG_INTEGER -> new BigIntegerStorage(bigInts);
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public Object aggregate(List<Integer> indexes, ProblemAggregator problemAggregat
if (value == null || value instanceof String) {
String textValue = toQuotedString(value, quote, separator);

if (!separator.equals("") && quote.equals("") && textValue.contains(separator)) {
if (!separator.isEmpty() && quote.isEmpty() && textValue.contains(separator)) {
innerAggregator.reportColumnAggregatedProblem(
new UnquotedDelimiter(this.getName(), row, "Unquoted delimiter."));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import java.math.MathContext;
import java.util.List;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.ColumnDoubleStorage;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
import org.enso.table.data.column.storage.numeric.DoubleStorage;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.BigDecimalType;
import org.enso.table.data.column.storage.type.BigIntegerType;
Expand Down Expand Up @@ -82,18 +82,18 @@ private final class FloatMeanAccumulator extends MeanAccumulator {
void accumulate(
List<Integer> indexes, Storage<?> storage, ProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
if (storage instanceof DoubleStorage doubleStorage) {
if (storage instanceof ColumnDoubleStorage doubleStorage) {
for (int i : indexes) {
if (!doubleStorage.isNothing(i)) {
total += doubleStorage.getItemAsDouble(i);
count++;
}
context.safepoint();
}
} else if (storage instanceof AbstractLongStorage longStorage) {
} else if (storage instanceof ColumnLongStorage longStorage) {
for (int i : indexes) {
if (!longStorage.isNothing(i)) {
total += longStorage.getItem(i);
total += longStorage.getItemAsLong(i);
count++;
}
context.safepoint();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.InferredIntegerBuilder;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnDoubleStorage;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
import org.enso.table.data.column.storage.numeric.DoubleStorage;
import org.enso.table.data.column.storage.type.BigIntegerType;
import org.enso.table.data.column.storage.type.FloatType;
import org.enso.table.data.column.storage.type.IntegerType;
Expand All @@ -34,8 +34,7 @@ public Sum(String name, Column column) {
public Builder makeBuilder(int size, ProblemAggregator problemAggregator) {
return switch (inputType) {
case IntegerType integerType -> new InferredIntegerBuilder(size, problemAggregator);
case BigIntegerType bigIntegerType -> Builder.getForType(
bigIntegerType, size, problemAggregator);
case BigIntegerType bigIntegerType -> Builder.getForBigInteger(size, problemAggregator);
case FloatType floatType -> Builder.getForDouble(floatType, size, problemAggregator);
case NullType nullType -> Builder.getForType(nullType, size, problemAggregator);
default -> throw new IllegalStateException(
Expand Down Expand Up @@ -90,16 +89,16 @@ void add(Object value) {
@Override
void accumulate(List<Integer> indexes, Storage<?> storage) {
Context context = Context.getCurrent();
if (storage instanceof AbstractLongStorage longStorage) {
if (storage instanceof ColumnLongStorage longStorage) {
for (int row : indexes) {
if (!longStorage.isNothing(row)) {
addLong(longStorage.getItem(row));
addLong(longStorage.getItemAsLong(row));
}
context.safepoint();
}
} else if (storage instanceof BigIntegerStorage bigIntegerStorage) {
for (int row : indexes) {
BigInteger value = bigIntegerStorage.getItem(row);
BigInteger value = bigIntegerStorage.getItemBoxed(row);
if (value != null) {
addBigInteger(value);
}
Expand Down Expand Up @@ -169,10 +168,10 @@ void add(Object value) {
@Override
void accumulate(List<Integer> indexes, Storage<?> storage) {
Context context = Context.getCurrent();
if (storage instanceof DoubleStorage doubleStorage) {
if (storage instanceof ColumnDoubleStorage doubleStorage) {
for (int row : indexes) {
if (!doubleStorage.isNothing(row)) {
addDouble(doubleStorage.getItem(row));
addDouble(doubleStorage.getItemAsDouble(row));
}
context.safepoint();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ public boolean accepts(Object o) {

@Override
protected Storage<BigDecimal> doSeal() {
return new BigDecimalStorage(data, currentSize);
return new BigDecimalStorage(data);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import java.math.BigInteger;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
import org.enso.table.data.column.storage.type.BigDecimalType;
import org.enso.table.data.column.storage.type.BigIntegerType;
Expand Down Expand Up @@ -45,7 +45,7 @@ public Builder retypeTo(StorageType type) {
return res;
}
case BigDecimalType _ -> {
var res = Builder.getForType(type, data.length, problemAggregator);
var res = Builder.getForBigDecimal(data.length);
for (int i = 0; i < currentSize; i++) {
if (data[i] == null) {
res.appendNulls(1);
Expand All @@ -61,7 +61,7 @@ public Builder retypeTo(StorageType type) {

@Override
protected Storage<BigInteger> doSeal() {
return new BigIntegerStorage(data, currentSize);
return new BigIntegerStorage(data);
}

@Override
Expand Down Expand Up @@ -98,14 +98,14 @@ static Builder retypeFromLongBuilder(LongBuilder longBuilder) {
@Override
public void appendBulkStorage(Storage<?> storage) {
if (storage.getType() instanceof IntegerType) {
if (storage instanceof AbstractLongStorage longStorage) {
int n = longStorage.size();
for (int i = 0; i < n; i++) {
if (storage instanceof ColumnLongStorage longStorage) {
long n = longStorage.getSize();
for (long i = 0; i < n; i++) {
if (storage.isNothing(i)) {
data[currentSize++] = null;
appendNulls(1);
} else {
long item = longStorage.getItem(i);
data[currentSize++] = BigInteger.valueOf(item);
long item = longStorage.getItemAsLong(i);
append(BigInteger.valueOf(item));
}
}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.BitSet;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnBooleanStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.BooleanType;
import org.enso.table.data.column.storage.type.NullType;
Expand Down Expand Up @@ -64,17 +65,27 @@ public void appendNulls(int count) {
public void appendBulkStorage(Storage<?> storage) {
if (storage.getType().equals(getType())) {
if (storage instanceof BoolStorage boolStorage) {
BitSets.copy(boolStorage.getValues(), vals, size, boolStorage.size());
BitSets.copy(boolStorage.getIsNothingMap(), isNothing, size, boolStorage.size());
size += boolStorage.size();
// We know this is valid for a BoolStorage.
int toCopy = (int) boolStorage.getSize();
BitSets.copy(boolStorage.getValues(), vals, size, toCopy);
BitSets.copy(boolStorage.getIsNothingMap(), isNothing, size, toCopy);
size += toCopy;
} else if (storage instanceof ColumnBooleanStorage columnBooleanStorage) {
for (long i = 0; i < columnBooleanStorage.getSize(); i++) {
if (columnBooleanStorage.isNothing(i)) {
appendNulls(1);
} else {
appendBoolean(columnBooleanStorage.getItemAsBoolean(i));
}
}
} else {
throw new IllegalStateException(
"Unexpected storage implementation for type BOOLEAN: "
+ storage
+ ". This is a bug in the Table library.");
}
} else if (storage.getType() instanceof NullType) {
appendNulls(storage.size());
appendNulls(Math.toIntExact(storage.getSize()));
} else {
throw new StorageTypeMismatchException(getType(), storage.getType());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ public interface Builder {
* */
int MAX_SIZE = Integer.MAX_VALUE;

private static int checkSize(long size) {
/** Checks that the size is within the maximum allowed. */
static int checkSize(long size) {
if (size > MAX_SIZE) {
throw new IllegalArgumentException("Columns cannot exceed " + MAX_SIZE + " rows.");
}
Expand All @@ -56,7 +57,7 @@ static Builder getForType(StorageType type, long size, ProblemAggregator problem
case TimeOfDayType _ -> getForTime(size);
case FloatType floatType -> getForDouble(floatType, size, problemAggregator);
case IntegerType integerType -> getForLong(integerType, size, problemAggregator);
case TextType textType -> getForText(size, textType);
case TextType textType -> getForText(textType, size);
case BigDecimalType _ -> getForBigDecimal(size);
case BigIntegerType _ -> getForBigInteger(size, problemAggregator);
case NullType x -> new NullBuilder();
Expand Down Expand Up @@ -150,7 +151,7 @@ static BuilderForType<ZonedDateTime> getForDateTime(long size) {
return new DateTimeBuilder(checkedSize, false);
}

static BuilderForType<String> getForText(long size, TextType textType) {
static BuilderForType<String> getForText(TextType textType, long size) {
int checkedSize = checkSize(size);
return new StringBuilder(checkedSize, textType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public boolean accepts(Object o) {

@Override
protected Storage<LocalDate> doSeal() {
return new DateStorage(data, currentSize);
return new DateStorage(data);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import java.time.ZonedDateTime;
import java.util.BitSet;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.datetime.DateStorage;
import org.enso.table.data.column.storage.datetime.DateTimeStorage;
import org.enso.table.data.column.storage.type.DateTimeType;
import org.enso.table.data.column.storage.type.DateType;
Expand Down Expand Up @@ -49,25 +48,17 @@ public void append(Object o) {
@Override
public void appendBulkStorage(Storage<?> storage) {
if (storage.getType() instanceof DateType) {
if (storage instanceof DateStorage dateStorage) {
Context context = Context.getCurrent();
for (int i = 0; i < dateStorage.size(); ++i) {
LocalDate date = dateStorage.getItemBoxed(i);
if (date == null) {
data[currentSize++] = null;
} else {
data[currentSize++] = convertDate(date);
}

context.safepoint();
Context context = Context.getCurrent();
for (long i = 0; i < storage.getSize(); ++i) {
var date = storage.getItemBoxed(i);
if (date == null) {
appendNulls(1);
} else if (date instanceof LocalDate localDate) {
append(convertDate(localDate));
} else {
throw new IllegalStateException("Unexpected type in DateStorage: " + date.getClass());
}
} else {
throw new IllegalStateException(
"Unexpected storage implementation for type "
+ storage.getType()
+ ": "
+ storage
+ ". This is a bug in the Table library.");
context.safepoint();
}
} else {
super.appendBulkStorage(storage);
Expand All @@ -81,7 +72,7 @@ public boolean accepts(Object o) {

@Override
protected Storage<ZonedDateTime> doSeal() {
return new DateTimeStorage(data, currentSize);
return new DateTimeStorage(data);
}

@Override
Expand Down
Loading

0 comments on commit f25164d

Please sign in to comment.