Skip to content

Commit

Permalink
RAI-22855 fix issues with (u)int128 within value types (#102)
Browse files Browse the repository at this point in the history
* RAI-22855 fix issues with (u)int128 within value types

* minor

* fix handling of several int128 in value types

* revert commented out test suite

* minor

* minor

* run tests on PR

* bump

* fix yaml

* workflow dispatch

* bump

---------

Co-authored-by: Pete Vilter <[email protected]>
  • Loading branch information
wienleung and vilterp authored Mar 22, 2024
1 parent 74ddab6 commit 4614e6d
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 22 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/go-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ name: build/test

on:
push:
branches:
- '*'
schedule:
- cron: '30 5 * * *'
workflow_dispatch:

jobs:
build:
Expand Down
147 changes: 130 additions & 17 deletions rai/results.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ type Tabular interface {
Strings(int) []string
}

// TabularSlice is an interface for columns that contain array data that can
// be sliced into sub-arrays, combining the sub-array's values to represent
// values such as int128
type TabularSlice interface {
Tabular
ColumnSlice(int, int) Column
}

type Relation interface {
Tabular
Showable
Expand Down Expand Up @@ -273,6 +281,8 @@ type listColumn[T any] struct {
cols []Column
}

var _ TabularSlice = &listColumn[int]{}

func (c listColumn[T]) Column(cnum int) Column {
return listItemColumn[T]{c.data, cnum, c.ncols}
}
Expand All @@ -287,6 +297,13 @@ func (c listColumn[T]) Columns() []Column {
return c.cols
}

func (c listColumn[T]) ColumnSlice(cnum int, width int) Column {
if width == 1 {
return listItemColumn[T]{c.data, cnum, c.ncols}
}
return listSliceColumn[T]{c.data, cnum, width, c.ncols}
}

func (c listColumn[T]) GetItem(rnum int, out []T) {
roffs := rnum * c.ncols
for cnum := 0; cnum < c.ncols; cnum++ {
Expand Down Expand Up @@ -431,6 +448,58 @@ func (c listItemColumn[T]) Value(rnum int) any {
return c.Item(rnum)
}

// Represents several sub-columns of a `listColumn` that represent one column for a composite type (e.g. int128)
type listSliceColumn[T any] struct {
data []T
cnum int
width int
ncols int
}

var _ TabularColumn[int] = &listSliceColumn[int]{}

func (c listSliceColumn[T]) Item(rnum int) []T {
out := make([]T, c.width)
c.GetItem(rnum, out)
return out
}

func (c listSliceColumn[T]) GetItem(rnum int, out []T) {
roffs := rnum * c.ncols
for i := 0; i < c.width; i++ {
out[i] = c.data[roffs+c.cnum+i]
}
}

func (c listSliceColumn[T]) NumCols() int {
return 1
}

func (c listSliceColumn[T]) Strings(rnum int) []string {
roffs := rnum * c.ncols
result := make([]string, c.width)
for i := 0; i < c.width; i++ {
result[i] = asString(c.data[roffs+c.cnum+i])
}
return result
}

func (c listSliceColumn[T]) NumRows() int {
return len(c.data) / c.ncols
}

func (c listSliceColumn[T]) String(rnum int) string {
return asString(c.Item(rnum))
}

func (c listSliceColumn[T]) Type() any {
return typeOf[T]()
}

func (c listSliceColumn[T]) Value(rnum int) any {
return c.Item(rnum)
}

type structColumn struct {
cols []Column
}
Expand Down Expand Up @@ -1544,7 +1613,7 @@ func newBuiltinValueColumn(vt ValueType, c Column, nrows int) Column {
case "FixedDecimal":
return newDecimalColumn(vt, c)
case "Hash":
return newUint128Column(c.(listColumn[uint64]))
return newUint128Column(c.(TabularColumn[uint64]))
case "Rational":
return newRationalColumn(c)
case "Missing":
Expand Down Expand Up @@ -1574,27 +1643,71 @@ func newSimpleValueColumn(vt ValueType, c Column, nrows int) Column {
return valueColumn{cols}
}

// getSliceWidth gets the corresponding width of an Arrow array column for
// a `t` that is one of the parts of a Signature
func getSliceWidth(t any) int {
switch tt := t.(type) {
case reflect.Type:
switch tt {
case Int128Type:
case Uint128Type:
return 2
default:
return 1
}
case ValueType:
ret := 0
for _, st := range t.(ValueType) {
ret += getSliceWidth(st)
}
return ret
}
return 0
}

// Projects a valueColumn from an underlying `Tabular` column.
func newTabularValueColumn(vt ValueType, c Tabular, nrows int) Column {
ncol := 0
ncols := len(vt)
cols := make([]Column, ncols)
for i, t := range vt {
var cc Column
switch tt := t.(type) {
case reflect.Type:
cc = newRelationColumn(tt, c.Column(ncol), nrows)
ncol++
case ValueType:
cc = newValueColumn(tt, c.Column(ncol), nrows)
ncol++
case string:
cc = newSymbolColumn(tt, nrows)
default:
cc = newLiteralColumn(tt, nrows)
tcols := len(vt)
cols := make([]Column, tcols)

if tsc, ok := c.(TabularSlice); ok {
for i, t := range vt {
sliceWidth := getSliceWidth(t)
var cc Column
switch tt := t.(type) {
case reflect.Type:
cc = newRelationColumn(tt, tsc.ColumnSlice(ncol, sliceWidth), nrows)
ncol += sliceWidth
case ValueType:
cc = newValueColumn(tt, tsc.ColumnSlice(ncol, sliceWidth), nrows)
ncol += sliceWidth
case string:
cc = newSymbolColumn(tt, nrows)
default:
cc = newLiteralColumn(tt, nrows)
}
cols[i] = cc
}
} else {
for i, t := range vt {
var cc Column
switch tt := t.(type) {
case reflect.Type:
cc = newRelationColumn(tt, c.Column(ncol), nrows)
ncol++
case ValueType:
cc = newValueColumn(tt, c.Column(ncol), nrows)
ncol++
case string:
cc = newSymbolColumn(tt, nrows)
default:
cc = newLiteralColumn(tt, nrows)
}
cols[i] = cc
}
cols[i] = cc
}

return valueColumn{cols}
}

Expand Down
70 changes: 65 additions & 5 deletions rai/results_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1236,21 +1236,81 @@ var valueTypeTests = []execTest{
}

var extraValueTypeTests = []execTest{
// Tests RAI-22855
{
query: `
module Foo
module Bar
value type MyType = Int, Int
value type MyType = UnsignedInt[64], FixedDecimal[128, 2]
def output = ^MyType[uint[64, 1], decimal[128, 2, 2/3]]`,
mdata: mdata("0.arrow", sig("output",
vtype("MyType", Uint64Type, vtype("rel:base:FixedDecimal", int64(128), int64(2), Int128Type)))),
pdata: xdata("0.arrow", sig(StructType),
row([]any{uint64(1), []uint64{67, 0}})),
rdata: xdata("0.arrow",
sig("output", vtype("MyType", Uint64Type, DecimalType)),
row("output", value("MyType", uint64(1),
NewDecimal128(67, 0, -2)))),
},
{
query: `
value type MyType = Hash
def h(x) = hash128["abc", _, x]
def output = ^MyType[h]`,
mdata: mdata("0.arrow", sig("output",
vtype("MyType", vtype("rel:base:Hash", Uint128Type)))),
pdata: xdata("0.arrow", sig(Uint64ListType),
row([]uint64{3877405323480549948, 3198683864092244389})),
rdata: xdata("0.arrow",
sig("output", vtype("MyType", BigIntType)),
row("output", value("MyType",
NewBigUint128(3877405323480549948, 3198683864092244389)))),
},
{
query: `
value type MyType = Hash, Hash
def h(x) = hash128["abc", _, x]
def output = ^MyType[h, h]`,
mdata: mdata("0.arrow", sig("output",
vtype("MyType", vtype("rel:base:Hash", Uint128Type), vtype("rel:base:Hash", Uint128Type)))),
pdata: xdata("0.arrow", sig(Uint64ListType),
row([]uint64{3877405323480549948, 3198683864092244389, 3877405323480549948, 3198683864092244389})),
rdata: xdata("0.arrow",
sig("output", vtype("MyType", BigIntType, BigIntType)),
row("output", value("MyType",
NewBigUint128(3877405323480549948, 3198683864092244389),
NewBigUint128(3877405323480549948, 3198683864092244389)))),
},
// End tests RAI-22855
{
query: `
module Foo
module Bar
value type MyType = Int, Int
end
end
end
def output = Foo:Bar:^MyType[12, 34]`,
def output = Foo:Bar:^MyType[12, 34]`,
mdata: mdata("0.arrow",
sig("output", vtype("Foo", "Bar", "MyType", Int64Type, Int64Type))),
pdata: xdata("0.arrow", sig(Int64ListType), row([]int64{12, 34})),
rdata: xdata("0.arrow",
sig("output", vtype("Foo", "Bar", "MyType", Int64Type, Int64Type)),
row("output", value("Foo", "Bar", "MyType", int64(12), int64(34)))),
},
// RAI-23484 There is a bug with nested value types
/*
{
query: `
value type Foo { Foo2 }
value type Foo2 {Int, SignedInt[128]}
def output { ^Foo[^Foo2[1, int128[2]]] }`,
mdata: mdata("0.arrow", sig("output",
vtype("Foo", vtype("Foo2", Int64Type, Int128Type)))),
pdata: xdata("0.arrow", sig(StructType),
row([]any{int64(1), []uint64{2, 0}})),
rdata: xdata("0.arrow",
sig("output", vtype("Foo", vtype("Foo2", Int64Type, BigIntType))),
row("output", value("Foo", vtype("Foo2", uint64(1), NewBigInt128(2, 0))))),
},
*/
}

var constValueTypeTests = []execTest{
Expand Down

0 comments on commit 4614e6d

Please sign in to comment.