From 4614e6d56f1ebf5320271420f6f83545232396f8 Mon Sep 17 00:00:00 2001 From: Wien Leung <192698+wienleung@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:58:08 -0700 Subject: [PATCH] RAI-22855 fix issues with (u)int128 within value types (#102) * RAI-22855 fix issues with (u)int128 within value types * minor * fix handling of several int128 in value types * revert commented out test suite * minor * minor * run tests on PR * bump * fix yaml * workflow dispatch * bump --------- Co-authored-by: Pete Vilter --- .github/workflows/go-build.yaml | 3 + rai/results.go | 147 ++++++++++++++++++++++++++++---- rai/results_test.go | 70 +++++++++++++-- 3 files changed, 198 insertions(+), 22 deletions(-) diff --git a/.github/workflows/go-build.yaml b/.github/workflows/go-build.yaml index b4b6e12..5950117 100644 --- a/.github/workflows/go-build.yaml +++ b/.github/workflows/go-build.yaml @@ -2,8 +2,11 @@ name: build/test on: push: + branches: + - '*' schedule: - cron: '30 5 * * *' + workflow_dispatch: jobs: build: diff --git a/rai/results.go b/rai/results.go index d23d5dc..10bdf80 100644 --- a/rai/results.go +++ b/rai/results.go @@ -103,6 +103,14 @@ type Tabular interface { Strings(int) []string } +// TabularSlice is an interface for columns that contain array data that can +// be sliced into sub-arrays, combining the sub-array's values to represent +// values such as int128 +type TabularSlice interface { + Tabular + ColumnSlice(int, int) Column +} + type Relation interface { Tabular Showable @@ -273,6 +281,8 @@ type listColumn[T any] struct { cols []Column } +var _ TabularSlice = &listColumn[int]{} + func (c listColumn[T]) Column(cnum int) Column { return listItemColumn[T]{c.data, cnum, c.ncols} } @@ -287,6 +297,13 @@ func (c listColumn[T]) Columns() []Column { return c.cols } +func (c listColumn[T]) ColumnSlice(cnum int, width int) Column { + if width == 1 { + return listItemColumn[T]{c.data, cnum, c.ncols} + } + return listSliceColumn[T]{c.data, cnum, width, c.ncols} +} + func (c listColumn[T]) GetItem(rnum int, out []T) { roffs := rnum * c.ncols for cnum := 0; cnum < c.ncols; cnum++ { @@ -431,6 +448,58 @@ func (c listItemColumn[T]) Value(rnum int) any { return c.Item(rnum) } +// Represents several sub-columns of a `listColumn` that represent one column for a composite type (e.g. int128) +type listSliceColumn[T any] struct { + data []T + cnum int + width int + ncols int +} + +var _ TabularColumn[int] = &listSliceColumn[int]{} + +func (c listSliceColumn[T]) Item(rnum int) []T { + out := make([]T, c.width) + c.GetItem(rnum, out) + return out +} + +func (c listSliceColumn[T]) GetItem(rnum int, out []T) { + roffs := rnum * c.ncols + for i := 0; i < c.width; i++ { + out[i] = c.data[roffs+c.cnum+i] + } +} + +func (c listSliceColumn[T]) NumCols() int { + return 1 +} + +func (c listSliceColumn[T]) Strings(rnum int) []string { + roffs := rnum * c.ncols + result := make([]string, c.width) + for i := 0; i < c.width; i++ { + result[i] = asString(c.data[roffs+c.cnum+i]) + } + return result +} + +func (c listSliceColumn[T]) NumRows() int { + return len(c.data) / c.ncols +} + +func (c listSliceColumn[T]) String(rnum int) string { + return asString(c.Item(rnum)) +} + +func (c listSliceColumn[T]) Type() any { + return typeOf[T]() +} + +func (c listSliceColumn[T]) Value(rnum int) any { + return c.Item(rnum) +} + type structColumn struct { cols []Column } @@ -1544,7 +1613,7 @@ func newBuiltinValueColumn(vt ValueType, c Column, nrows int) Column { case "FixedDecimal": return newDecimalColumn(vt, c) case "Hash": - return newUint128Column(c.(listColumn[uint64])) + return newUint128Column(c.(TabularColumn[uint64])) case "Rational": return newRationalColumn(c) case "Missing": @@ -1574,27 +1643,71 @@ func newSimpleValueColumn(vt ValueType, c Column, nrows int) Column { return valueColumn{cols} } +// getSliceWidth gets the corresponding width of an Arrow array column for +// a `t` that is one of the parts of a Signature +func getSliceWidth(t any) int { + switch tt := t.(type) { + case reflect.Type: + switch tt { + case Int128Type: + case Uint128Type: + return 2 + default: + return 1 + } + case ValueType: + ret := 0 + for _, st := range t.(ValueType) { + ret += getSliceWidth(st) + } + return ret + } + return 0 +} + // Projects a valueColumn from an underlying `Tabular` column. func newTabularValueColumn(vt ValueType, c Tabular, nrows int) Column { ncol := 0 - ncols := len(vt) - cols := make([]Column, ncols) - for i, t := range vt { - var cc Column - switch tt := t.(type) { - case reflect.Type: - cc = newRelationColumn(tt, c.Column(ncol), nrows) - ncol++ - case ValueType: - cc = newValueColumn(tt, c.Column(ncol), nrows) - ncol++ - case string: - cc = newSymbolColumn(tt, nrows) - default: - cc = newLiteralColumn(tt, nrows) + tcols := len(vt) + cols := make([]Column, tcols) + + if tsc, ok := c.(TabularSlice); ok { + for i, t := range vt { + sliceWidth := getSliceWidth(t) + var cc Column + switch tt := t.(type) { + case reflect.Type: + cc = newRelationColumn(tt, tsc.ColumnSlice(ncol, sliceWidth), nrows) + ncol += sliceWidth + case ValueType: + cc = newValueColumn(tt, tsc.ColumnSlice(ncol, sliceWidth), nrows) + ncol += sliceWidth + case string: + cc = newSymbolColumn(tt, nrows) + default: + cc = newLiteralColumn(tt, nrows) + } + cols[i] = cc + } + } else { + for i, t := range vt { + var cc Column + switch tt := t.(type) { + case reflect.Type: + cc = newRelationColumn(tt, c.Column(ncol), nrows) + ncol++ + case ValueType: + cc = newValueColumn(tt, c.Column(ncol), nrows) + ncol++ + case string: + cc = newSymbolColumn(tt, nrows) + default: + cc = newLiteralColumn(tt, nrows) + } + cols[i] = cc } - cols[i] = cc } + return valueColumn{cols} } diff --git a/rai/results_test.go b/rai/results_test.go index 057ecaa..b767e53 100644 --- a/rai/results_test.go +++ b/rai/results_test.go @@ -1236,14 +1236,58 @@ var valueTypeTests = []execTest{ } var extraValueTypeTests = []execTest{ + // Tests RAI-22855 { query: ` - module Foo - module Bar - value type MyType = Int, Int + value type MyType = UnsignedInt[64], FixedDecimal[128, 2] + def output = ^MyType[uint[64, 1], decimal[128, 2, 2/3]]`, + mdata: mdata("0.arrow", sig("output", + vtype("MyType", Uint64Type, vtype("rel:base:FixedDecimal", int64(128), int64(2), Int128Type)))), + pdata: xdata("0.arrow", sig(StructType), + row([]any{uint64(1), []uint64{67, 0}})), + rdata: xdata("0.arrow", + sig("output", vtype("MyType", Uint64Type, DecimalType)), + row("output", value("MyType", uint64(1), + NewDecimal128(67, 0, -2)))), + }, + { + query: ` + value type MyType = Hash + def h(x) = hash128["abc", _, x] + def output = ^MyType[h]`, + mdata: mdata("0.arrow", sig("output", + vtype("MyType", vtype("rel:base:Hash", Uint128Type)))), + pdata: xdata("0.arrow", sig(Uint64ListType), + row([]uint64{3877405323480549948, 3198683864092244389})), + rdata: xdata("0.arrow", + sig("output", vtype("MyType", BigIntType)), + row("output", value("MyType", + NewBigUint128(3877405323480549948, 3198683864092244389)))), + }, + { + query: ` + value type MyType = Hash, Hash + def h(x) = hash128["abc", _, x] + def output = ^MyType[h, h]`, + mdata: mdata("0.arrow", sig("output", + vtype("MyType", vtype("rel:base:Hash", Uint128Type), vtype("rel:base:Hash", Uint128Type)))), + pdata: xdata("0.arrow", sig(Uint64ListType), + row([]uint64{3877405323480549948, 3198683864092244389, 3877405323480549948, 3198683864092244389})), + rdata: xdata("0.arrow", + sig("output", vtype("MyType", BigIntType, BigIntType)), + row("output", value("MyType", + NewBigUint128(3877405323480549948, 3198683864092244389), + NewBigUint128(3877405323480549948, 3198683864092244389)))), + }, + // End tests RAI-22855 + { + query: ` + module Foo + module Bar + value type MyType = Int, Int + end end - end - def output = Foo:Bar:^MyType[12, 34]`, + def output = Foo:Bar:^MyType[12, 34]`, mdata: mdata("0.arrow", sig("output", vtype("Foo", "Bar", "MyType", Int64Type, Int64Type))), pdata: xdata("0.arrow", sig(Int64ListType), row([]int64{12, 34})), @@ -1251,6 +1295,22 @@ var extraValueTypeTests = []execTest{ sig("output", vtype("Foo", "Bar", "MyType", Int64Type, Int64Type)), row("output", value("Foo", "Bar", "MyType", int64(12), int64(34)))), }, + // RAI-23484 There is a bug with nested value types + /* + { + query: ` + value type Foo { Foo2 } + value type Foo2 {Int, SignedInt[128]} + def output { ^Foo[^Foo2[1, int128[2]]] }`, + mdata: mdata("0.arrow", sig("output", + vtype("Foo", vtype("Foo2", Int64Type, Int128Type)))), + pdata: xdata("0.arrow", sig(StructType), + row([]any{int64(1), []uint64{2, 0}})), + rdata: xdata("0.arrow", + sig("output", vtype("Foo", vtype("Foo2", Int64Type, BigIntType))), + row("output", value("Foo", vtype("Foo2", uint64(1), NewBigInt128(2, 0))))), + }, + */ } var constValueTypeTests = []execTest{