From 8db7641401fc5f8e17cff1804e9e253c83e061dd Mon Sep 17 00:00:00 2001 From: Rasmus Viitanen Date: Wed, 29 Jan 2025 10:27:59 +0100 Subject: [PATCH 1/5] feat(spanner/spansql): Add support for tokenlist and create search index --- spanner/spansql/keywords.go | 13 +- spanner/spansql/parser.go | 216 ++++++++++++++++++++++++++++++++- spanner/spansql/parser_test.go | 56 +++++++++ spanner/spansql/sql.go | 59 +++++++++ spanner/spansql/types.go | 30 +++++ 5 files changed, 368 insertions(+), 6 deletions(-) diff --git a/spanner/spansql/keywords.go b/spanner/spansql/keywords.go index 6515a41e591e..da68f0a646aa 100644 --- a/spanner/spansql/keywords.go +++ b/spanner/spansql/keywords.go @@ -127,9 +127,11 @@ var keywords = map[string]bool{ // funcs is the set of reserved keywords that are functions. // https://cloud.google.com/spanner/docs/functions-and-operators -var funcs = make(map[string]bool) -var funcArgParsers = make(map[string]func(*parser) (Expr, *parseError)) -var aggregateFuncs = make(map[string]bool) +var ( + funcs = make(map[string]bool) + funcArgParsers = make(map[string]func(*parser) (Expr, *parseError)) + aggregateFuncs = make(map[string]bool) +) func init() { for _, f := range funcNames { @@ -234,6 +236,11 @@ var funcNames = []string{ "TRIM", "UPPER", + // Token functions. + "TOKENIZE_FULLTEXT", + "TOKENIZE_NUMBER", + "TOKEN", + // Array functions. "ARRAY", "ARRAY_CONCAT", diff --git a/spanner/spansql/parser.go b/spanner/spansql/parser.go index be9f92d9721c..857f61d52b34 100644 --- a/spanner/spansql/parser.go +++ b/spanner/spansql/parser.go @@ -962,6 +962,19 @@ func (p *parser) eat(want ...string) bool { return true } +func (p *parser) expectAny(any ...string) *parseError { + tok := p.next() + if tok.err != nil { + return tok.err + } + for _, w := range any { + if tok.caseEqual(w) { + return nil + } + } + return p.errorf("got %q while expecting any of %q", tok.value, any) +} + func (p *parser) expect(want ...string) *parseError { for _, w := range want { tok := p.next() @@ -980,7 +993,7 @@ func (p *parser) parseDDLStmt() (DDLStmt, *parseError) { /* statement: - { create_database | create_table | create_index | alter_table | drop_table | rename_table | drop_index | create_change_stream | alter_change_stream | drop_change_stream } + { create_database | create_table | create_index | create_search_index | alter_table | drop_table | rename_table | drop_index | create_change_stream | alter_change_stream | drop_change_stream } */ // TODO: support create_database @@ -991,6 +1004,9 @@ func (p *parser) parseDDLStmt() (DDLStmt, *parseError) { } else if p.sniff("CREATE", "INDEX") || p.sniff("CREATE", "UNIQUE", "INDEX") || p.sniff("CREATE", "NULL_FILTERED", "INDEX") || p.sniff("CREATE", "UNIQUE", "NULL_FILTERED", "INDEX") { ci, err := p.parseCreateIndex() return ci, err + } else if p.sniff("CREATE", "SEARCH", "INDEX") { + ci, err := p.parseCreateSearchIndex() + return ci, err } else if p.sniff("CREATE", "VIEW") || p.sniff("CREATE", "OR", "REPLACE", "VIEW") { cv, err := p.parseCreateView() return cv, err @@ -1361,6 +1377,132 @@ func (p *parser) parseCreateIndex() (*CreateIndex, *parseError) { return ci, nil } +func (p *parser) parseCreateSearchIndex() (*CreateSearchIndex, *parseError) { + debugf("parseCreateSearchIndex: %v", p) + + /* + CREATE SEARCH INDEX index_name + ON table_name ( token_column_list ) + [ storing_clause ] [ partition_clause ] + [ orderby_clause ] [ where_clause ] + [ interleave_clause ] [ options_clause ] + + where index_name is: + {a—z|A—Z}[{a—z|A—Z|0—9|_}+] + + and token_column_list is: + column_name [, ...] + + and storing_clause is: + STORING ( column_name [, ...] ) + + and partition_clause is: + PARTITION BY column_name [, ...] + + and orderby_clause is: + ORDER BY column_name [ {ASC | DESC} ] [, column_name [ {ASC | DESC} ]] + + and where_clause is: + WHERE column_name IS NOT NULL [AND ...] + + and interleave_clause is: + , INTERLEAVE IN table_name + + and options_clause is: + OPTIONS ( option_name=option_value [, ...] ) + + */ + + if err := p.expect("CREATE"); err != nil { + return nil, err + } + pos := p.Pos() + if err := p.expect("SEARCH", "INDEX"); err != nil { + return nil, err + } + + // Parse the index name + iname, err := p.parseTableOrIndexOrColumnName() + if err != nil { + return nil, err + } + + // Parse the table name + if err := p.expect("ON"); err != nil { + return nil, err + } + tname, err := p.parseTableOrIndexOrColumnName() + if err != nil { + return nil, err + } + ci := &CreateSearchIndex{ + Name: iname, + Table: tname, + Position: pos, + } + ci.Columns, err = p.parseKeyPartList() + if err != nil { + return nil, err + } + + if p.eat("STORING") { + ci.Storing, err = p.parseColumnNameList() + if err != nil { + return nil, err + } + } + + if p.eat("PARTITION", "BY") { + ci.PartitionBy, err = p.parseColumnNameList() + if err != nil { + return nil, err + } + } + + if p.eat("ORDER", "BY") { + ci.OrderBy, err = p.parseColumnNameList() + if err != nil { + return nil, err + } + } + + if p.eat("WHERE") { + for { + name, err := p.parseTableOrIndexOrColumnName() + if err != nil { + return nil, err + } + if err := p.expect("IS", "NOT", "NULL"); err != nil { + return nil, err + } + ci.WhereIsNotNull = append(ci.WhereIsNotNull, name) + + if !p.sniff("AND") { + break + } + if err := p.expect("AND"); err != nil { + return nil, err + } + } + } + + if p.eat(",", "INTERLEAVE", "IN") { + ci.Interleave, err = p.parseTableOrIndexOrColumnName() + if err != nil { + return nil, err + } + } + + if p.eat("OPTIONS") { + ci.Options, err = p.parseSearchIndexOptions() + if err != nil { + return nil, err + } + } + + return ci, nil +} + func (p *parser) parseCreateView() (*CreateView, *parseError) { debugf("parseCreateView: %v", p) @@ -1564,6 +1706,7 @@ func (p *parser) parseRevokeRole() (*RevokeRole, *parseError) { return r, nil } + func (p *parser) parseGrantOrRevokeRoleList(end string) ([]ID, *parseError) { var roleList []ID f := func(p *parser) *parseError { @@ -1626,6 +1769,7 @@ func (p *parser) parsePrivileges() ([]Privilege, *parseError) { } return privs, nil } + func (p *parser) parseAlterTable() (*AlterTable, *parseError) { debugf("parseAlterTable: %v", p) @@ -2054,7 +2198,7 @@ func (p *parser) parseColumnDef() (ColumnDef, *parseError) { /* column_def: - column_name {scalar_type | array_type} [NOT NULL] [{DEFAULT ( expression ) | AS ( expression ) STORED}] [options_def] + column_name {scalar_type | array_type} [NOT NULL] [{DEFAULT ( expression ) | AS ( expression ) {STORED | HIDDEN}}] [options_def] */ name, err := p.parseTableOrIndexOrColumnName() @@ -2091,7 +2235,7 @@ func (p *parser) parseColumnDef() (ColumnDef, *parseError) { if err := p.expect(")"); err != nil { return ColumnDef{}, err } - if err := p.expect("STORED"); err != nil { + if err := p.expectAny("STORED", "HIDDEN"); err != nil { return ColumnDef{}, err } } @@ -2163,6 +2307,70 @@ func (p *parser) parseColumnAlteration() (ColumnAlteration, *parseError) { return sct, nil } +func (p *parser) parseSearchIndexOptions() (SearchIndexOptions, *parseError) { + debugf("parseSearchIndexOptions: %v", p) + /* + options_def: + OPTIONS (sort_order_sharding = { true | false }, disable_automatic_uid_column = { true | false }) + */ + + if err := p.expect("OPTIONS"); err != nil { + return SearchIndexOptions{}, err + } + if err := p.expect("("); err != nil { + return SearchIndexOptions{}, err + } + + // TODO: Figure out if column options are case insensitive. + // We ignore case for the key (because it is easier) but not the value. + var opts SearchIndexOptions + for { + if p.eat("sort_order_sharding", "=") { + tok := p.next() + if tok.err != nil { + return SearchIndexOptions{}, tok.err + } + sortOrderSharding := new(bool) + switch tok.value { + case "true": + *sortOrderSharding = true + case "false": + *sortOrderSharding = false + default: + return SearchIndexOptions{}, p.errorf("got %q, want true or false", tok.value) + } + opts.SortOrderSharding = sortOrderSharding + } else if p.eat("disable_automatic_uid_column", "=") { + tok := p.next() + if tok.err != nil { + return SearchIndexOptions{}, tok.err + } + disableAutomaticUidColumn := new(bool) + switch tok.value { + case "true": + *disableAutomaticUidColumn = true + case "false": + *disableAutomaticUidColumn = false + default: + return SearchIndexOptions{}, p.errorf("got %q, want true or false", tok.value) + } + opts.DisableAutomaticUidColumn = disableAutomaticUidColumn + } + if p.sniff(")") { + break + } + if !p.eat(",") { + return SearchIndexOptions{}, p.errorf("missing ',' in options list") + } + } + + if err := p.expect(")"); err != nil { + return SearchIndexOptions{}, err + } + + return opts, nil +} + func (p *parser) parseColumnOptions() (ColumnOptions, *parseError) { debugf("parseColumnOptions: %v", p) /* @@ -2891,6 +3099,7 @@ func (p *parser) parseCreateSequence() (*CreateSequence, *parseError) { return cs, nil } + func (p *parser) parseCreateProtoBundle() (*CreateProtoBundle, *parseError) { debugf("parseCreateProtoBundle: %v", p) @@ -3107,6 +3316,7 @@ var baseTypes = map[string]TypeBase{ "JSON": JSON, "PROTO": Proto, // for use in CAST "ENUM": Enum, // for use in CAST + "TOKENLIST": Tokenlist, } func (p *parser) parseBaseType() (Type, *parseError) { diff --git a/spanner/spansql/parser_test.go b/spanner/spansql/parser_test.go index 79a27ddad780..6201ddb847a2 100644 --- a/spanner/spansql/parser_test.go +++ b/spanner/spansql/parser_test.go @@ -2186,6 +2186,62 @@ func TestParseDDL(t *testing.T) { }, }, }, + { + `CREATE TABLE TableTokens ( + Name STRING(MAX) NOT NULL, + Name_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(Name)) HIDDEN, + Value INT64 NOT NULL, + Value_Tokens TOKENLIST AS (TOKENIZE_NUMBER(Value)) HIDDEN, + Values ARRAY, + Values_Tokens TOKENLIST AS (TOKEN(Values)) HIDDEN, + ValueTwo INT64 NOT NULL, + ) PRIMARY KEY (Name); + + CREATE SEARCH INDEX TableTokensSearch + ON TableTokens(Name_Tokens, Value_Tokens) + STORING (ValueTwo);`, + &DDL{ + Filename: "filename", + List: []DDLStmt{ + &CreateTable{ + Name: "TableTokens", + Columns: []ColumnDef{ + {Name: "Name", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(2)}, + { + Name: "Name_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ID("Name")}}, + Position: line(3), + }, + {Name: "Value", Type: Type{Base: Int64}, NotNull: true, Position: line(4)}, + { + Name: "Value_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_NUMBER", Args: []Expr{ID("Value")}}, + Position: line(5), + }, + {Name: "Values", Type: Type{Array: true, Base: String, Len: MaxLen}, NotNull: false, Position: line(6)}, + { + Name: "Values_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKEN", Args: []Expr{ID("Values")}}, + Position: line(7), + }, + {Name: "ValueTwo", Type: Type{Base: Int64}, NotNull: true, Position: line(8)}, + }, + PrimaryKey: []KeyPart{{Column: "Name"}}, + Position: line(1), + }, + &CreateSearchIndex{ + Name: "TableTokensSearch", + Table: "TableTokens", + Columns: []KeyPart{ + {Column: "Name_Tokens"}, + {Column: "Value_Tokens"}, + }, + Storing: []ID{"ValueTwo"}, + Position: line(11), + }, + }, + }, + }, } for _, test := range tests { got, err := ParseDDL("filename", test.in) diff --git a/spanner/spansql/sql.go b/spanner/spansql/sql.go index 07ccc40febfa..9138d53b4e94 100644 --- a/spanner/spansql/sql.go +++ b/spanner/spansql/sql.go @@ -98,6 +98,63 @@ func (ci CreateIndex) SQL() string { return str } +func (csi CreateSearchIndex) SQL() string { + str := "CREATE SEARCH INDEX" + + str += csi.Name.SQL() + " ON " + csi.Table.SQL() + "(" + for i, c := range csi.Columns { + if i > 0 { + str += ", " + } + str += c.SQL() + } + str += ")" + if len(csi.Storing) > 0 { + str += " STORING (" + idList(csi.Storing, ", ") + ")" + } + + if len(csi.PartitionBy) > 0 { + str += " PARTITION BY (" + idList(csi.PartitionBy, ", ") + ")" + } + + if len(csi.OrderBy) > 0 { + str += " ORDER BY (" + idList(csi.OrderBy, ", ") + ")" + } + + if len(csi.WhereIsNotNull) > 0 { + str += " WHERE " + idList(csi.WhereIsNotNull, " IS NOT NULL AND") + // Remove last " AND" + str = str[:len(str)-4] + } + + if csi.Interleave != "" { + str += ", INTERLEAVE IN " + csi.Interleave.SQL() + } + + if csi.Options != (SearchIndexOptions{}) { + str += " " + csi.Options.SQL() + } + return str +} + +func (opts SearchIndexOptions) SQL() string { + str := "OPTIONS (" + hasOpt := false + if opts.DisableAutomaticUidColumn != nil { + hasOpt = true + str += fmt.Sprintf("disable_automatic_uid_column=%t", *opts.DisableAutomaticUidColumn) + } + if opts.DisableAutomaticUidColumn != nil { + if hasOpt { + str += ", " + } + hasOpt = true + str += fmt.Sprintf("sort_order_sharding=%t", *opts.SortOrderSharding) + } + str += ")" + return str +} + func (cp CreateProtoBundle) SQL() string { typeList := "" if len(cp.Types) > 0 { @@ -570,6 +627,7 @@ func (d *Delete) SQL() string { func (do DropProtoBundle) SQL() string { return "DROP PROTO BUNDLE" } + func (ap AlterProtoBundle) SQL() string { str := "ALTER PROTO BUNDLE" if len(ap.AddTypes) > 0 { @@ -741,6 +799,7 @@ func (pt PrivilegeType) SQL() string { } panic("unknown PrivilegeType") } + func (kp KeyPart) SQL() string { str := kp.Column.SQL() if kp.Desc { diff --git a/spanner/spansql/types.go b/spanner/spansql/types.go index 6a0bbc5167b7..097e65f2f460 100644 --- a/spanner/spansql/types.go +++ b/spanner/spansql/types.go @@ -121,6 +121,35 @@ func (*CreateIndex) isDDLStmt() {} func (ci *CreateIndex) Pos() Position { return ci.Position } func (ci *CreateIndex) clearOffset() { ci.Position.Offset = 0 } +// SearchIndexOptions represents options on a search index as part of a +// CREATE SEARCH INDEX statement. +type SearchIndexOptions struct { + SortOrderSharding *bool + DisableAutomaticUidColumn *bool +} + +// CreateSearchIndex represents a CREATE SEARCH INDEX statement. +// https://cloud.google.com/spanner/docs/data-definition-language#create-search-index +type CreateSearchIndex struct { + Name ID + Table ID + Columns []KeyPart + + Storing []ID + PartitionBy []ID + OrderBy []ID + WhereIsNotNull []ID + Interleave ID + Options SearchIndexOptions + + Position Position // position of the "CREATE" token +} + +func (ci *CreateSearchIndex) String() string { return fmt.Sprintf("%#v", ci) } +func (*CreateSearchIndex) isDDLStmt() {} +func (ci *CreateSearchIndex) Pos() Position { return ci.Position } +func (ci *CreateSearchIndex) clearOffset() { ci.Position.Offset = 0 } + // CreateView represents a CREATE [OR REPLACE] VIEW statement. // https://cloud.google.com/spanner/docs/data-definition-language#view_statements type CreateView struct { @@ -549,6 +578,7 @@ const ( JSON Proto Enum // Enum used in CAST expressions + Tokenlist ) type PrivilegeType int From 3415078e93370d9fdcf7160a799465de228805d8 Mon Sep 17 00:00:00 2001 From: Rasmus Viitanen Date: Thu, 30 Jan 2025 08:42:22 +0100 Subject: [PATCH 2/5] Fix lint --- spanner/spansql/parser.go | 8 ++++---- spanner/spansql/sql.go | 6 +++--- spanner/spansql/types.go | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/spanner/spansql/parser.go b/spanner/spansql/parser.go index 857f61d52b34..aef46a8ceb43 100644 --- a/spanner/spansql/parser.go +++ b/spanner/spansql/parser.go @@ -2345,16 +2345,16 @@ func (p *parser) parseSearchIndexOptions() (SearchIndexOptions, *parseError) { if tok.err != nil { return SearchIndexOptions{}, tok.err } - disableAutomaticUidColumn := new(bool) + disableAutomaticUIDColumn := new(bool) switch tok.value { case "true": - *disableAutomaticUidColumn = true + *disableAutomaticUIDColumn = true case "false": - *disableAutomaticUidColumn = false + *disableAutomaticUIDColumn = false default: return SearchIndexOptions{}, p.errorf("got %q, want true or false", tok.value) } - opts.DisableAutomaticUidColumn = disableAutomaticUidColumn + opts.DisableAutomaticUIDColumn = disableAutomaticUIDColumn } if p.sniff(")") { break diff --git a/spanner/spansql/sql.go b/spanner/spansql/sql.go index 9138d53b4e94..a0012ebd9adb 100644 --- a/spanner/spansql/sql.go +++ b/spanner/spansql/sql.go @@ -140,11 +140,11 @@ func (csi CreateSearchIndex) SQL() string { func (opts SearchIndexOptions) SQL() string { str := "OPTIONS (" hasOpt := false - if opts.DisableAutomaticUidColumn != nil { + if opts.DisableAutomaticUIDColumn != nil { hasOpt = true - str += fmt.Sprintf("disable_automatic_uid_column=%t", *opts.DisableAutomaticUidColumn) + str += fmt.Sprintf("disable_automatic_uid_column=%t", *opts.DisableAutomaticUIDColumn) } - if opts.DisableAutomaticUidColumn != nil { + if opts.DisableAutomaticUIDColumn != nil { if hasOpt { str += ", " } diff --git a/spanner/spansql/types.go b/spanner/spansql/types.go index 097e65f2f460..c23027580174 100644 --- a/spanner/spansql/types.go +++ b/spanner/spansql/types.go @@ -125,7 +125,7 @@ func (ci *CreateIndex) clearOffset() { ci.Position.Offset = 0 } // CREATE SEARCH INDEX statement. type SearchIndexOptions struct { SortOrderSharding *bool - DisableAutomaticUidColumn *bool + DisableAutomaticUIDColumn *bool } // CreateSearchIndex represents a CREATE SEARCH INDEX statement. From b207202f2210ac7c5e008b35fe3cb90ef79d41ac Mon Sep 17 00:00:00 2001 From: Rasmus Viitanen Date: Thu, 30 Jan 2025 08:43:33 +0100 Subject: [PATCH 3/5] Add all token functions --- spanner/spansql/keywords.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spanner/spansql/keywords.go b/spanner/spansql/keywords.go index da68f0a646aa..d8488e83892c 100644 --- a/spanner/spansql/keywords.go +++ b/spanner/spansql/keywords.go @@ -237,9 +237,12 @@ var funcNames = []string{ "UPPER", // Token functions. + "TOKEN", + "TOKENIZE_BOOL", "TOKENIZE_FULLTEXT", + "TOKENIZE_NGRAMS", "TOKENIZE_NUMBER", - "TOKEN", + "TOKENIZE_SUBSTRING", // Array functions. "ARRAY", From 10efbb4914e6e2ebd8b0d442f2616e62ab15fdca Mon Sep 17 00:00:00 2001 From: Rasmus Viitanen Date: Thu, 30 Jan 2025 09:07:34 +0100 Subject: [PATCH 4/5] Add all token functions --- spanner/spansql/keywords.go | 1 + spanner/spansql/parser_test.go | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/spanner/spansql/keywords.go b/spanner/spansql/keywords.go index d8488e83892c..640b3a743860 100644 --- a/spanner/spansql/keywords.go +++ b/spanner/spansql/keywords.go @@ -243,6 +243,7 @@ var funcNames = []string{ "TOKENIZE_NGRAMS", "TOKENIZE_NUMBER", "TOKENIZE_SUBSTRING", + "TOKENLIST_CONCAT", // Array functions. "ARRAY", diff --git a/spanner/spansql/parser_test.go b/spanner/spansql/parser_test.go index 6201ddb847a2..d60344951b82 100644 --- a/spanner/spansql/parser_test.go +++ b/spanner/spansql/parser_test.go @@ -2194,7 +2194,13 @@ func TestParseDDL(t *testing.T) { Value_Tokens TOKENLIST AS (TOKENIZE_NUMBER(Value)) HIDDEN, Values ARRAY, Values_Tokens TOKENLIST AS (TOKEN(Values)) HIDDEN, - ValueTwo INT64 NOT NULL, + ValueTwo BOOL NOT NULL, + ValueTwo_Tokens TOKENLIST AS (TOKENIZE_BOOL(ValueTwo)) HIDDEN, + ValueThree STRING(MAX) NOT NULL, + ValueThree_Tokens TOKENLIST AS (TOKENIZE_NGRAMS(ValueThree)) HIDDEN, + ValueFour STRING(MAX) NOT NULL, + ValueFour_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(ValueFour || "concat")) HIDDEN, + Combined_Tokens TOKENLIST AS (TOKENLIST_CONCAT([Name_Tokens, ValueFour_Tokens])) HIDDEN, ) PRIMARY KEY (Name); CREATE SEARCH INDEX TableTokensSearch @@ -2224,7 +2230,29 @@ func TestParseDDL(t *testing.T) { Generated: Func{Name: "TOKEN", Args: []Expr{ID("Values")}}, Position: line(7), }, - {Name: "ValueTwo", Type: Type{Base: Int64}, NotNull: true, Position: line(8)}, + {Name: "ValueTwo", Type: Type{Base: Bool}, NotNull: true, Position: line(8)}, + { + Name: "ValueTwo_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_BOOL", Args: []Expr{ID("ValueTwo")}}, + Position: line(9), + }, + {Name: "ValueThree", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(10)}, + { + Name: "ValueThree_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_NGRAMS", Args: []Expr{ID("ValueThree")}}, + Position: line(11), + }, + {Name: "ValueFour", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(12)}, + { + Name: "ValueFour_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ArithOp{Op: 5, LHS: ID("ValueFour"), RHS: StringLiteral("concat")}}}, + Position: line(13), + }, + { + Name: "Combined_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENLIST_CONCAT", Args: []Expr{Array{ID("Name_Tokens"), ID("ValueFour_Tokens")}}}, + Position: line(14), + }, }, PrimaryKey: []KeyPart{{Column: "Name"}}, Position: line(1), @@ -2237,7 +2265,7 @@ func TestParseDDL(t *testing.T) { {Column: "Value_Tokens"}, }, Storing: []ID{"ValueTwo"}, - Position: line(11), + Position: line(17), }, }, }, From 89f2302967c13ee732ea8c01093e06b891d62a00 Mon Sep 17 00:00:00 2001 From: Rasmus Viitanen Date: Thu, 30 Jan 2025 13:16:00 +0100 Subject: [PATCH 5/5] Add support for definition expressions --- spanner/spansql/keywords.go | 5 ++ spanner/spansql/parser.go | 62 +++++++++++++++----- spanner/spansql/parser_test.go | 40 ++++++++++++- spanner/spansql/sql.go | 17 +++++- spanner/spansql/sql_test.go | 104 +++++++++++++++++++++++++++++++++ spanner/spansql/types.go | 8 +++ 6 files changed, 219 insertions(+), 17 deletions(-) diff --git a/spanner/spansql/keywords.go b/spanner/spansql/keywords.go index 640b3a743860..25fcf5d39610 100644 --- a/spanner/spansql/keywords.go +++ b/spanner/spansql/keywords.go @@ -155,6 +155,11 @@ func init() { // Special case of SEQUENCE arg for GET_NEXT_SEQUENCE_VALUE, GET_INTERNAL_SEQUENCE_STATE funcArgParsers["GET_NEXT_SEQUENCE_VALUE"] = sequenceArgParser funcArgParsers["GET_INTERNAL_SEQUENCE_STATE"] = sequenceArgParser + // Special case for tokenization, which uses `[, key => value]` definitions + funcArgParsers["TOKENIZE_FULLTEXT"] = tokenDefinitionArgParser + funcArgParsers["TOKENIZE_NGRAMS"] = tokenDefinitionArgParser + funcArgParsers["TOKENIZE_NUMBER"] = tokenDefinitionArgParser + funcArgParsers["TOKENIZE_SUBSTRING"] = tokenDefinitionArgParser } var funcNames = []string{ diff --git a/spanner/spansql/parser.go b/spanner/spansql/parser.go index aef46a8ceb43..8ea562b38fe9 100644 --- a/spanner/spansql/parser.go +++ b/spanner/spansql/parser.go @@ -938,6 +938,24 @@ func (p *parser) sniff(want ...string) bool { return true } +// sniff reports whether the next N+skip tokens are as specified. +func (p *parser) sniff_ahead(skip int, want ...string) bool { + // Store current parser state and restore on the way out. + orig := *p + defer func() { *p = orig }() + + for i := 0; i < skip; i++ { + p.next() + } + + for _, w := range want { + if !p.next().caseEqual(w) { + return false + } + } + return true +} + // sniffTokenType reports whether the next token type is as specified. func (p *parser) sniffTokenType(want tokenType) bool { orig := *p @@ -962,19 +980,6 @@ func (p *parser) eat(want ...string) bool { return true } -func (p *parser) expectAny(any ...string) *parseError { - tok := p.next() - if tok.err != nil { - return tok.err - } - for _, w := range any { - if tok.caseEqual(w) { - return nil - } - } - return p.errorf("got %q while expecting any of %q", tok.value, any) -} - func (p *parser) expect(want ...string) *parseError { for _, w := range want { tok := p.next() @@ -2235,8 +2240,12 @@ func (p *parser) parseColumnDef() (ColumnDef, *parseError) { if err := p.expect(")"); err != nil { return ColumnDef{}, err } - if err := p.expectAny("STORED", "HIDDEN"); err != nil { - return ColumnDef{}, err + if p.eat("HIDDEN") { + cd.Hidden = true + } else { + if err := p.expect("STORED"); err != nil { + return ColumnDef{}, err + } } } @@ -4129,6 +4138,29 @@ var sequenceArgParser = func(p *parser) (Expr, *parseError) { return p.parseExpr() } +var tokenDefinitionArgParser = func(p *parser) (Expr, *parseError) { + if p.sniff_ahead(1, "=", ">") { + tok := p.next() + if tok.err != nil { + return DefinitionExpr{}, tok.err + } + definition := tok.value + if err := p.expect("=", ">"); err != nil { + return DefinitionExpr{}, err + } + value, err := p.parseExpr() + if err != nil { + return DefinitionExpr{}, err + } + return DefinitionExpr{ + Key: definition, + Value: value, + }, nil + } else { + return p.parseExpr() + } +} + func (p *parser) parseAggregateFunc() (Func, *parseError) { tok := p.next() if tok.err != nil { diff --git a/spanner/spansql/parser_test.go b/spanner/spansql/parser_test.go index d60344951b82..d5c1a0d95c87 100644 --- a/spanner/spansql/parser_test.go +++ b/spanner/spansql/parser_test.go @@ -2201,6 +2201,8 @@ func TestParseDDL(t *testing.T) { ValueFour STRING(MAX) NOT NULL, ValueFour_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(ValueFour || "concat")) HIDDEN, Combined_Tokens TOKENLIST AS (TOKENLIST_CONCAT([Name_Tokens, ValueFour_Tokens])) HIDDEN, + Argument_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(Name, token_category => "small")) HIDDEN, + ManyArgument_Tokens TOKENLIST AS (TOKENIZE_NUMBER(Value, comparison_type => "all", min => 1, max => 5)) HIDDEN, ) PRIMARY KEY (Name); CREATE SEARCH INDEX TableTokensSearch @@ -2216,43 +2218,79 @@ func TestParseDDL(t *testing.T) { { Name: "Name_Tokens", Type: Type{Base: Tokenlist}, Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ID("Name")}}, + Hidden: true, Position: line(3), }, {Name: "Value", Type: Type{Base: Int64}, NotNull: true, Position: line(4)}, { Name: "Value_Tokens", Type: Type{Base: Tokenlist}, Generated: Func{Name: "TOKENIZE_NUMBER", Args: []Expr{ID("Value")}}, + Hidden: true, Position: line(5), }, {Name: "Values", Type: Type{Array: true, Base: String, Len: MaxLen}, NotNull: false, Position: line(6)}, { Name: "Values_Tokens", Type: Type{Base: Tokenlist}, Generated: Func{Name: "TOKEN", Args: []Expr{ID("Values")}}, + Hidden: true, Position: line(7), }, {Name: "ValueTwo", Type: Type{Base: Bool}, NotNull: true, Position: line(8)}, { Name: "ValueTwo_Tokens", Type: Type{Base: Tokenlist}, Generated: Func{Name: "TOKENIZE_BOOL", Args: []Expr{ID("ValueTwo")}}, + Hidden: true, Position: line(9), }, {Name: "ValueThree", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(10)}, { Name: "ValueThree_Tokens", Type: Type{Base: Tokenlist}, Generated: Func{Name: "TOKENIZE_NGRAMS", Args: []Expr{ID("ValueThree")}}, + Hidden: true, Position: line(11), }, {Name: "ValueFour", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(12)}, { Name: "ValueFour_Tokens", Type: Type{Base: Tokenlist}, Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ArithOp{Op: 5, LHS: ID("ValueFour"), RHS: StringLiteral("concat")}}}, + Hidden: true, Position: line(13), }, { Name: "Combined_Tokens", Type: Type{Base: Tokenlist}, Generated: Func{Name: "TOKENLIST_CONCAT", Args: []Expr{Array{ID("Name_Tokens"), ID("ValueFour_Tokens")}}}, + Hidden: true, Position: line(14), }, + { + Name: "Argument_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ID("Name"), DefinitionExpr{ + Key: "token_category", + Value: StringLiteral("small"), + }}}, + Hidden: true, + Position: line(15), + }, + { + Name: "ManyArgument_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_NUMBER", Args: []Expr{ + ID("Value"), + DefinitionExpr{ + Key: "comparison_type", + Value: StringLiteral("all"), + }, + DefinitionExpr{ + Key: "min", + Value: IntegerLiteral(1), + }, + DefinitionExpr{ + Key: "max", + Value: IntegerLiteral(5), + }, + }}, + Hidden: true, + Position: line(16), + }, }, PrimaryKey: []KeyPart{{Column: "Name"}}, Position: line(1), @@ -2265,7 +2303,7 @@ func TestParseDDL(t *testing.T) { {Column: "Value_Tokens"}, }, Storing: []ID{"ValueTwo"}, - Position: line(17), + Position: line(19), }, }, }, diff --git a/spanner/spansql/sql.go b/spanner/spansql/sql.go index a0012ebd9adb..5450477369ed 100644 --- a/spanner/spansql/sql.go +++ b/spanner/spansql/sql.go @@ -700,7 +700,12 @@ func (cd ColumnDef) SQL() string { str += " DEFAULT (" + cd.Default.SQL() + ")" } if cd.Generated != nil { - str += " AS (" + cd.Generated.SQL() + ") STORED" + str += " AS (" + cd.Generated.SQL() + ")" + if cd.Hidden { + str += " HIDDEN" + } else { + str += " STORED" + } } if cd.Options != (ColumnOptions{}) { str += " " + cd.Options.SQL() @@ -782,7 +787,10 @@ func (tb TypeBase) SQL() string { return "PROTO" case Enum: return "ENUM" + case Tokenlist: + return "TOKENLIST" } + panic("unknown TypeBase") } @@ -1088,6 +1096,13 @@ func (ee ExtractExpr) addSQL(sb *strings.Builder) { ee.Expr.addSQL(sb) } +func (de DefinitionExpr) SQL() string { return buildSQL(de) } +func (de DefinitionExpr) addSQL(sb *strings.Builder) { + sb.WriteString(de.Key) + sb.WriteString(" => ") + de.Value.addSQL(sb) +} + func (aze AtTimeZoneExpr) SQL() string { return buildSQL(aze) } func (aze AtTimeZoneExpr) addSQL(sb *strings.Builder) { aze.Expr.addSQL(sb) diff --git a/spanner/spansql/sql_test.go b/spanner/spansql/sql_test.go index 67fb5520073e..d14e92b797c0 100644 --- a/spanner/spansql/sql_test.go +++ b/spanner/spansql/sql_test.go @@ -765,6 +765,110 @@ func TestSQL(t *testing.T) { ) PRIMARY KEY(id)`, reparseDDL, }, + { + &CreateTable{ + Name: "TableTokens", + Columns: []ColumnDef{ + {Name: "Name", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(2)}, + { + Name: "Name_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ID("Name")}}, + Hidden: true, + Position: line(3), + }, + {Name: "Value", Type: Type{Base: Int64}, NotNull: true, Position: line(4)}, + { + Name: "Value_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_NUMBER", Args: []Expr{ID("Value")}}, + Hidden: true, + Position: line(5), + }, + {Name: "Values", Type: Type{Array: true, Base: String, Len: MaxLen}, NotNull: false, Position: line(6)}, + { + Name: "Values_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKEN", Args: []Expr{ID("Values")}}, + Hidden: true, + Position: line(7), + }, + {Name: "ValueTwo", Type: Type{Base: Bool}, NotNull: true, Position: line(8)}, + { + Name: "ValueTwo_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_BOOL", Args: []Expr{ID("ValueTwo")}}, + Hidden: true, + Position: line(9), + }, + {Name: "ValueThree", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(10)}, + { + Name: "ValueThree_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_NGRAMS", Args: []Expr{ID("ValueThree")}}, + Hidden: true, + Position: line(11), + }, + {Name: "ValueFour", Type: Type{Base: String, Len: MaxLen}, NotNull: true, Position: line(12)}, + { + Name: "ValueFour_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ID("ValueFour")}}, + Hidden: true, + Position: line(13), + }, + { + Name: "Combined_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENLIST_CONCAT", Args: []Expr{Array{ID("Name_Tokens"), ID("ValueFour_Tokens")}}}, + Hidden: true, + Position: line(14), + }, + { + Name: "Argument_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_FULLTEXT", Args: []Expr{ID("Name"), DefinitionExpr{ + Key: "token_category", + Value: StringLiteral("small"), + }}}, + Hidden: true, + Position: line(15), + }, + { + Name: "ManyArgument_Tokens", Type: Type{Base: Tokenlist}, + Generated: Func{Name: "TOKENIZE_NUMBER", Args: []Expr{ + ID("Value"), + DefinitionExpr{ + Key: "comparison_type", + Value: StringLiteral("all"), + }, + DefinitionExpr{ + Key: "min", + Value: IntegerLiteral(1), + }, + DefinitionExpr{ + Key: "max", + Value: IntegerLiteral(5), + }, + }}, + Hidden: true, + Position: line(16), + }, + }, + PrimaryKey: []KeyPart{{Column: "Name"}}, + Position: line(1), + }, + `CREATE TABLE TableTokens ( + Name STRING(MAX) NOT NULL, + Name_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(Name)) HIDDEN, + Value INT64 NOT NULL, + Value_Tokens TOKENLIST AS (TOKENIZE_NUMBER(Value)) HIDDEN, + Values ARRAY, + Values_Tokens TOKENLIST AS (TOKEN(Values)) HIDDEN, + ValueTwo BOOL NOT NULL, + ValueTwo_Tokens TOKENLIST AS (TOKENIZE_BOOL(ValueTwo)) HIDDEN, + ValueThree STRING(MAX) NOT NULL, + ValueThree_Tokens TOKENLIST AS (TOKENIZE_NGRAMS(ValueThree)) HIDDEN, + ValueFour STRING(MAX) NOT NULL, + ValueFour_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(ValueFour)) HIDDEN, + Combined_Tokens TOKENLIST AS (TOKENLIST_CONCAT([Name_Tokens, ValueFour_Tokens])) HIDDEN, + Argument_Tokens TOKENLIST AS (TOKENIZE_FULLTEXT(Name, token_category => "small")) HIDDEN, + ManyArgument_Tokens TOKENLIST AS (TOKENIZE_NUMBER(Value, comparison_type => "all", min => 1, max => 5)) HIDDEN, +) PRIMARY KEY(Name)`, + reparseDDL, + }, { &CreateIndex{ Name: "Ia", diff --git a/spanner/spansql/types.go b/spanner/spansql/types.go index c23027580174..e32759e19993 100644 --- a/spanner/spansql/types.go +++ b/spanner/spansql/types.go @@ -501,6 +501,7 @@ type ColumnDef struct { Name ID Type Type NotNull bool + Hidden bool Default Expr // set if this column has a default value Generated Expr // set of this is a generated column @@ -849,6 +850,13 @@ type TypedExpr struct { func (TypedExpr) isBoolExpr() {} // possibly bool func (TypedExpr) isExpr() {} +type DefinitionExpr struct { + Key string + Value Expr +} + +func (DefinitionExpr) isExpr() {} + type ExtractExpr struct { Part string Type Type