Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(spanner/spansql): Add support for parsing tokenlist and create search index #11522

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions spanner/spansql/keywords.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,11 @@ var keywords = map[string]bool{

// funcs is the set of reserved keywords that are functions.
// https://cloud.google.com/spanner/docs/functions-and-operators
var funcs = make(map[string]bool)
var funcArgParsers = make(map[string]func(*parser) (Expr, *parseError))
var aggregateFuncs = make(map[string]bool)
var (
funcs = make(map[string]bool)
funcArgParsers = make(map[string]func(*parser) (Expr, *parseError))
aggregateFuncs = make(map[string]bool)
)

func init() {
for _, f := range funcNames {
Expand Down Expand Up @@ -234,6 +236,11 @@ var funcNames = []string{
"TRIM",
"UPPER",

// Token functions.
rasviitanen marked this conversation as resolved.
Show resolved Hide resolved
"TOKENIZE_FULLTEXT",
"TOKENIZE_NUMBER",
"TOKEN",

// Array functions.
"ARRAY",
"ARRAY_CONCAT",
Expand Down
216 changes: 213 additions & 3 deletions spanner/spansql/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,19 @@
return true
}

func (p *parser) expectAny(any ...string) *parseError {
tok := p.next()
if tok.err != nil {
return tok.err
}
for _, w := range any {
if tok.caseEqual(w) {
return nil
}
}
return p.errorf("got %q while expecting any of %q", tok.value, any)
}

func (p *parser) expect(want ...string) *parseError {
for _, w := range want {
tok := p.next()
Expand All @@ -980,7 +993,7 @@

/*
statement:
{ create_database | create_table | create_index | alter_table | drop_table | rename_table | drop_index | create_change_stream | alter_change_stream | drop_change_stream }
{ create_database | create_table | create_index | create_search_index | alter_table | drop_table | rename_table | drop_index | create_change_stream | alter_change_stream | drop_change_stream }
*/

// TODO: support create_database
Expand All @@ -991,6 +1004,9 @@
} else if p.sniff("CREATE", "INDEX") || p.sniff("CREATE", "UNIQUE", "INDEX") || p.sniff("CREATE", "NULL_FILTERED", "INDEX") || p.sniff("CREATE", "UNIQUE", "NULL_FILTERED", "INDEX") {
ci, err := p.parseCreateIndex()
return ci, err
} else if p.sniff("CREATE", "SEARCH", "INDEX") {
ci, err := p.parseCreateSearchIndex()
return ci, err
} else if p.sniff("CREATE", "VIEW") || p.sniff("CREATE", "OR", "REPLACE", "VIEW") {
cv, err := p.parseCreateView()
return cv, err
Expand Down Expand Up @@ -1361,6 +1377,132 @@
return ci, nil
}

func (p *parser) parseCreateSearchIndex() (*CreateSearchIndex, *parseError) {
debugf("parseCreateSearchIndex: %v", p)

/*
CREATE SEARCH INDEX index_name
ON table_name ( token_column_list )
[ storing_clause ] [ partition_clause ]
[ orderby_clause ] [ where_clause ]
[ interleave_clause ] [ options_clause ]

where index_name is:
{a—z|A—Z}[{a—z|A—Z|0—9|_}+]

and token_column_list is:
column_name [, ...]

and storing_clause is:
STORING ( column_name [, ...] )

and partition_clause is:
PARTITION BY column_name [, ...]

and orderby_clause is:
ORDER BY column_name [ {ASC | DESC} ] [, column_name [ {ASC | DESC} ]]

and where_clause is:
WHERE column_name IS NOT NULL [AND ...]

and interleave_clause is:
, INTERLEAVE IN table_name

and options_clause is:
OPTIONS ( option_name=option_value [, ...] )

*/

if err := p.expect("CREATE"); err != nil {
return nil, err
}
pos := p.Pos()
if err := p.expect("SEARCH", "INDEX"); err != nil {
return nil, err
}

// Parse the index name
iname, err := p.parseTableOrIndexOrColumnName()
if err != nil {
return nil, err
}

// Parse the table name
if err := p.expect("ON"); err != nil {
return nil, err
}
tname, err := p.parseTableOrIndexOrColumnName()
if err != nil {
return nil, err
}
ci := &CreateSearchIndex{
Name: iname,
Table: tname,
Position: pos,
}
ci.Columns, err = p.parseKeyPartList()
if err != nil {
return nil, err
}

if p.eat("STORING") {
ci.Storing, err = p.parseColumnNameList()
if err != nil {
return nil, err
}
}

if p.eat("PARTITION", "BY") {
ci.PartitionBy, err = p.parseColumnNameList()
if err != nil {
return nil, err
}
}

if p.eat("ORDER", "BY") {
ci.OrderBy, err = p.parseColumnNameList()
if err != nil {
return nil, err
}
}

if p.eat("WHERE") {
for {
name, err := p.parseTableOrIndexOrColumnName()
if err != nil {
return nil, err
}
if err := p.expect("IS", "NOT", "NULL"); err != nil {
return nil, err
}
ci.WhereIsNotNull = append(ci.WhereIsNotNull, name)

if !p.sniff("AND") {
break
}
if err := p.expect("AND"); err != nil {
return nil, err
}
}
}

if p.eat(",", "INTERLEAVE", "IN") {
ci.Interleave, err = p.parseTableOrIndexOrColumnName()
if err != nil {
return nil, err
}
}

if p.eat("OPTIONS") {
ci.Options, err = p.parseSearchIndexOptions()
if err != nil {
return nil, err
}
}

return ci, nil
}

func (p *parser) parseCreateView() (*CreateView, *parseError) {
debugf("parseCreateView: %v", p)

Expand Down Expand Up @@ -1564,6 +1706,7 @@

return r, nil
}

func (p *parser) parseGrantOrRevokeRoleList(end string) ([]ID, *parseError) {
var roleList []ID
f := func(p *parser) *parseError {
Expand Down Expand Up @@ -1626,6 +1769,7 @@
}
return privs, nil
}

func (p *parser) parseAlterTable() (*AlterTable, *parseError) {
debugf("parseAlterTable: %v", p)

Expand Down Expand Up @@ -2054,7 +2198,7 @@

/*
column_def:
column_name {scalar_type | array_type} [NOT NULL] [{DEFAULT ( expression ) | AS ( expression ) STORED}] [options_def]
column_name {scalar_type | array_type} [NOT NULL] [{DEFAULT ( expression ) | AS ( expression ) {STORED | HIDDEN}}] [options_def]
*/

name, err := p.parseTableOrIndexOrColumnName()
Expand Down Expand Up @@ -2091,7 +2235,7 @@
if err := p.expect(")"); err != nil {
return ColumnDef{}, err
}
if err := p.expect("STORED"); err != nil {
if err := p.expectAny("STORED", "HIDDEN"); err != nil {
return ColumnDef{}, err
}
}
Expand Down Expand Up @@ -2163,6 +2307,70 @@
return sct, nil
}

func (p *parser) parseSearchIndexOptions() (SearchIndexOptions, *parseError) {
debugf("parseSearchIndexOptions: %v", p)
/*
options_def:
OPTIONS (sort_order_sharding = { true | false }, disable_automatic_uid_column = { true | false })
*/

if err := p.expect("OPTIONS"); err != nil {
return SearchIndexOptions{}, err
}
if err := p.expect("("); err != nil {
return SearchIndexOptions{}, err
}

// TODO: Figure out if column options are case insensitive.
// We ignore case for the key (because it is easier) but not the value.
var opts SearchIndexOptions
for {
if p.eat("sort_order_sharding", "=") {
tok := p.next()
if tok.err != nil {
return SearchIndexOptions{}, tok.err
}
sortOrderSharding := new(bool)
switch tok.value {
case "true":
*sortOrderSharding = true
case "false":
*sortOrderSharding = false
default:
return SearchIndexOptions{}, p.errorf("got %q, want true or false", tok.value)
}
opts.SortOrderSharding = sortOrderSharding
} else if p.eat("disable_automatic_uid_column", "=") {
tok := p.next()
if tok.err != nil {
return SearchIndexOptions{}, tok.err
}
disableAutomaticUidColumn := new(bool)

Check failure on line 2348 in spanner/spansql/parser.go

View workflow job for this annotation

GitHub Actions / vet

var disableAutomaticUidColumn should be disableAutomaticUIDColumn
switch tok.value {
case "true":
*disableAutomaticUidColumn = true
case "false":
*disableAutomaticUidColumn = false
default:
return SearchIndexOptions{}, p.errorf("got %q, want true or false", tok.value)
}
opts.DisableAutomaticUidColumn = disableAutomaticUidColumn
}
if p.sniff(")") {
break
}
if !p.eat(",") {
return SearchIndexOptions{}, p.errorf("missing ',' in options list")
}
}

if err := p.expect(")"); err != nil {
return SearchIndexOptions{}, err
}

return opts, nil
}

func (p *parser) parseColumnOptions() (ColumnOptions, *parseError) {
debugf("parseColumnOptions: %v", p)
/*
Expand Down Expand Up @@ -2891,6 +3099,7 @@

return cs, nil
}

func (p *parser) parseCreateProtoBundle() (*CreateProtoBundle, *parseError) {
debugf("parseCreateProtoBundle: %v", p)

Expand Down Expand Up @@ -3107,6 +3316,7 @@
"JSON": JSON,
"PROTO": Proto, // for use in CAST
"ENUM": Enum, // for use in CAST
"TOKENLIST": Tokenlist,
}

func (p *parser) parseBaseType() (Type, *parseError) {
Expand Down
56 changes: 56 additions & 0 deletions spanner/spansql/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2186,6 +2186,62 @@ func TestParseDDL(t *testing.T) {
},
},
},
{
`CREATE TABLE TableTokens (
Name STRING(MAX) NOT NULL,
Name_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(Name)) HIDDEN,
Value INT64 NOT NULL,
Value_Tokens TOKENLIST AS (TOKENIZE_NUMBER(Value)) HIDDEN,
Values ARRAY<STRING(MAX)>,
Values_Tokens TOKENLIST AS (TOKEN(Values)) HIDDEN,
ValueTwo INT64 NOT NULL,
) PRIMARY KEY (Name);

CREATE SEARCH INDEX TableTokensSearch
ON TableTokens(Name_Tokens, Value_Tokens)
STORING (ValueTwo);`,
&DDL{
Filename: "filename",
List: []DDLStmt{
&CreateTable{
Name: "TableTokens",
Columns: []ColumnDef{
{Name: "Name", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(2)},
{
Name: "Name_Tokens", Type: Type{Base: Tokenlist},
Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ID("Name")}},
Position: line(3),
},
{Name: "Value", Type: Type{Base: Int64}, NotNull: true, Position: line(4)},
{
Name: "Value_Tokens", Type: Type{Base: Tokenlist},
Generated: Func{Name: "TOKENIZE_NUMBER", Args: []Expr{ID("Value")}},
Position: line(5),
},
{Name: "Values", Type: Type{Array: true, Base: String, Len: MaxLen}, NotNull: false, Position: line(6)},
{
Name: "Values_Tokens", Type: Type{Base: Tokenlist},
Generated: Func{Name: "TOKEN", Args: []Expr{ID("Values")}},
Position: line(7),
},
{Name: "ValueTwo", Type: Type{Base: Int64}, NotNull: true, Position: line(8)},
},
PrimaryKey: []KeyPart{{Column: "Name"}},
Position: line(1),
},
&CreateSearchIndex{
Name: "TableTokensSearch",
Table: "TableTokens",
Columns: []KeyPart{
{Column: "Name_Tokens"},
{Column: "Value_Tokens"},
},
Storing: []ID{"ValueTwo"},
Position: line(11),
},
},
},
},
}
for _, test := range tests {
got, err := ParseDDL("filename", test.in)
Expand Down
Loading
Loading