From 1788a80dc21930eaf1ca12e39c291bdb436bb3fd Mon Sep 17 00:00:00 2001 From: Rahul Rampure Date: Thu, 14 Sep 2023 03:03:24 +0530 Subject: [PATCH] MB-58033: Support for custom datetime layouts (#1866) ## Jira [MB-58033](https://issues.couchbase.com/browse/MB-58033) ## Description - Add 'percentstyle' date time layout format that mimics 'strftime' used for date time layout specification in C and Python. - Add 'isostyle' date time layout format that attempts to mimic 'DateTimeFormatter' which is used to specify date time layouts in Java. - Added unit tests --------- Co-authored-by: Abhinav Dangeti --- analysis/datetime/iso/iso.go | 243 ++++++++++++++++++ analysis/datetime/iso/iso_test.go | 89 +++++++ analysis/datetime/percent/percent.go | 167 ++++++++++++ analysis/datetime/percent/percent_test.go | 114 ++++++++ analysis/datetime/sanitized/sanitized.go | 2 +- analysis/datetime/sanitized/sanitized_test.go | 14 + config/config.go | 2 + query.go | 34 +++ search_test.go | 149 ++++++++++- 9 files changed, 812 insertions(+), 2 deletions(-) create mode 100644 analysis/datetime/iso/iso.go create mode 100644 analysis/datetime/iso/iso_test.go create mode 100644 analysis/datetime/percent/percent.go create mode 100644 analysis/datetime/percent/percent_test.go diff --git a/analysis/datetime/iso/iso.go b/analysis/datetime/iso/iso.go new file mode 100644 index 000000000..cd75b1036 --- /dev/null +++ b/analysis/datetime/iso/iso.go @@ -0,0 +1,243 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iso + +import ( + "fmt" + "strings" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "isostyle" + +var textLiteralDelimiter byte = '\'' // single quote + +// ISO style date strings are represented in +// https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html +// +// Some format specifiers are not specified in go time package, such as: +// - 'V' for timezone name, like 'Europe/Berlin' or 'America/New_York'. +// - 'Q' for quarter of year, like Q3 or 3rd Quarter. +// - 'zzzz' for full name of timezone like "Japan Standard Time" or "Eastern Standard Time". +// - 'O' for localized zone-offset, like GMT+8 or GMT+08:00. +// - '[]' for optional section of the format. +// - 'G' for era, like AD or BC. +// - 'W' for week of month. +// - 'D' for day of year. +// So date strings with these date elements cannot be parsed. +var timeElementToLayout = map[byte]map[int]string{ + 'M': { + 4: "January", // MMMM = full month name + 3: "Jan", // MMM = short month name + 2: "01", // MM = month of year (2 digits) (01-12) + 1: "1", // M = month of year (1 digit) (1-12) + }, + 'd': { + 2: "02", // dd = day of month (2 digits) (01-31) + 1: "2", // d = day of month (1 digit) (1-31) + }, + 'a': { + 2: "pm", // aa = pm/am + 1: "PM", // a = PM/AM + }, + 'H': { + 2: "15", // HH = hour (24 hour clock) (2 digits) + 1: "15", // H = hour (24 hour clock) (1 digit) + }, + 'm': { + 2: "04", // mm = minute (2 digits) + 1: "4", // m = minute (1 digit) + }, + 's': { + 2: "05", // ss = seconds (2 digits) + 1: "5", // s = seconds (1 digit) + }, + + // timezone offsets from UTC below + 'X': { + 5: "Z07:00:00", // XXXXX = timezone offset (+-hh:mm:ss) + 4: "Z070000", // XXXX = timezone offset (+-hhmmss) + 3: "Z07:00", // XXX = timezone offset (+-hh:mm) + 2: "Z0700", // XX = timezone offset (+-hhmm) + 1: "Z07", // X = timezone offset (+-hh) + }, + 'x': { + 5: "-07:00:00", // xxxxx = timezone offset (+-hh:mm:ss) + 4: "-070000", // xxxx = timezone offset (+-hhmmss) + 3: "-07:00", // xxx = timezone offset (+-hh:mm) + 2: "-0700", // xx = timezone offset (+-hhmm) + 1: "-07", // x = timezone offset (+-hh) + }, +} + +type DateTimeParser struct { + layouts []string +} + +func New(layouts []string) *DateTimeParser { + return &DateTimeParser{ + layouts: layouts, + } +} + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + for _, layout := range p.layouts { + rv, err := time.Parse(layout, input) + if err == nil { + return rv, layout, nil + } + } + return time.Time{}, "", analysis.ErrInvalidDateTime +} + +func letterCounter(layout string, idx int) int { + count := 1 + for idx+count < len(layout) { + if layout[idx+count] == layout[idx] { + count++ + } else { + break + } + } + return count +} + +func invalidFormatError(character byte, count int) error { + return fmt.Errorf("invalid format string, unknown format specifier: " + strings.Repeat(string(character), count)) +} + +func parseISOString(layout string) (string, error) { + var dateTimeLayout strings.Builder + + for idx := 0; idx < len(layout); { + // check if the character is a text literal delimiter (') + if layout[idx] == textLiteralDelimiter { + if idx+1 < len(layout) && layout[idx+1] == textLiteralDelimiter { + // if the next character is also a text literal delimiter, then + // copy the character as is + dateTimeLayout.WriteByte(textLiteralDelimiter) + idx += 2 + continue + } + // find the next text literal delimiter + for idx++; idx < len(layout); idx++ { + if layout[idx] == textLiteralDelimiter { + break + } + dateTimeLayout.WriteByte(layout[idx]) + } + // idx can either be equal to len(layout) if the text literal delimiter is not found + // after the first text literal delimiter or it will be equal to the index of the + // second text literal delimiter + if idx == len(layout) { + // text literal delimiter not found error + return "", fmt.Errorf("invalid format string, expected text literal delimiter: " + string(textLiteralDelimiter)) + } + // increment idx to skip the second text literal delimiter + idx++ + continue + } + // check if character is a letter in english alphabet - a-zA-Z which are reserved + // for format specifiers + if (layout[idx] >= 'a' && layout[idx] <= 'z') || (layout[idx] >= 'A' && layout[idx] <= 'Z') { + // find the number of times the character occurs consecutively + count := letterCounter(layout, idx) + character := layout[idx] + // first check the table + if layout, ok := timeElementToLayout[character][count]; ok { + dateTimeLayout.WriteString(layout) + } else { + switch character { + case 'y', 'u', 'Y': + // year + if count == 2 { + dateTimeLayout.WriteString("06") + } else { + format := fmt.Sprintf("%%0%ds", count) + dateTimeLayout.WriteString(fmt.Sprintf(format, "2006")) + } + case 'h', 'K': + // hour (1-12) + if count == 2 { + dateTimeLayout.WriteString("03") + } else if count == 1 { + dateTimeLayout.WriteString("3") + } else { + return "", invalidFormatError(character, count) + } + case 'E': + // day of week + if count == 4 { + dateTimeLayout.WriteString("Monday") + } else if count <= 3 { + dateTimeLayout.WriteString("Mon") + } else { + return "", invalidFormatError(character, count) + } + case 'S': + // fraction of second + // .SSS = millisecond + // .SSSSSS = microsecond + // .SSSSSSSSS = nanosecond + if count > 9 { + return "", invalidFormatError(character, count) + } + dateTimeLayout.WriteString(strings.Repeat(string('0'), count)) + case 'z': + // timezone id + if count < 5 { + dateTimeLayout.WriteString("MST") + } else { + return "", invalidFormatError(character, count) + } + default: + return "", invalidFormatError(character, count) + } + } + idx += count + } else { + // copy the character as is + dateTimeLayout.WriteByte(layout[idx]) + idx++ + } + } + return dateTimeLayout.String(), nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + layouts, ok := config["layouts"].([]interface{}) + if !ok { + return nil, fmt.Errorf("must specify layouts") + } + var layoutStrs []string + for _, layout := range layouts { + layoutStr, ok := layout.(string) + if ok { + layout, err := parseISOString(layoutStr) + if err != nil { + return nil, err + } + layoutStrs = append(layoutStrs, layout) + } + } + return New(layoutStrs), nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/analysis/datetime/iso/iso_test.go b/analysis/datetime/iso/iso_test.go new file mode 100644 index 000000000..52681d196 --- /dev/null +++ b/analysis/datetime/iso/iso_test.go @@ -0,0 +1,89 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iso + +import ( + "fmt" + "testing" +) + +func TestConversionFromISOStyle(t *testing.T) { + tests := []struct { + input string + output string + err error + }{ + { + input: "yyyy-MM-dd", + output: "2006-01-02", + err: nil, + }, + { + input: "uuu/M''''dd'T'HH:m:ss.SSS", + output: "2006/1''02T15:4:05.000", + err: nil, + }, + { + input: "YYYY-MM-dd'T'H:mm:ss zzz", + output: "2006-01-02T15:04:05 MST", + err: nil, + }, + { + input: "MMMM dd yyyy', 'HH:mm:ss.SSS", + output: "January 02 2006, 15:04:05.000", + err: nil, + }, + { + input: "h 'o'''' clock' a, XXX", + output: "3 o' clock PM, Z07:00", + err: nil, + }, + { + input: "YYYY-MM-dd'T'HH:mm:ss'Z'", + output: "2006-01-02T15:04:05Z", + err: nil, + }, + { + input: "E MMM d H:mm:ss z Y", + output: "Mon Jan 2 15:04:05 MST 2006", + err: nil, + }, + { + input: "E MMM DD H:m:s z Y", + output: "", + err: fmt.Errorf("invalid format string, unknown format specifier: DD"), + }, + { + input: "E MMM''''' H:m:s z Y", + output: "", + err: fmt.Errorf("invalid format string, expected text literal delimiter: '"), + }, + { + input: "MMMMM dd yyyy', 'HH:mm:ss.SSS", + output: "", + err: fmt.Errorf("invalid format string, unknown format specifier: MMMMM"), + }, + } + for _, test := range tests { + out, err := parseISOString(test.input) + if err != nil && test.err == nil || err == nil && test.err != nil { + t.Fatalf("expected error %v, got error %v", test.err, err) + } + if out != test.output { + t.Fatalf("expected output %v, got %v", test.output, out) + } + } + +} diff --git a/analysis/datetime/percent/percent.go b/analysis/datetime/percent/percent.go new file mode 100644 index 000000000..7e8202f2b --- /dev/null +++ b/analysis/datetime/percent/percent.go @@ -0,0 +1,167 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package percent + +import ( + "fmt" + "strings" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "percentstyle" + +var formatDelimiter byte = '%' + +// format specifiers as per strftime in the C standard library +// https://man7.org/linux/man-pages/man3/strftime.3.html +var formatSpecifierToLayout = map[byte]string{ + formatDelimiter: string(formatDelimiter), // %% = % (literal %) + 'a': "Mon", // %a = short weekday name + 'A': "Monday", // %A = full weekday name + 'd': "02", // %d = day of month (2 digits) (01-31) + 'e': "2", // %e = day of month (1 digit) (1-31) + 'b': "Jan", // %b = short month name + 'B': "January", // %B = full month name + 'm': "01", // %m = month of year (2 digits) (01-12) + 'y': "06", // %y = year without century + 'Y': "2006", // %Y = year with century + 'H': "15", // %H = hour (24 hour clock) (2 digits) + 'I': "03", // %I = hour (12 hour clock) (2 digits) + 'l': "3", // %l = hour (12 hour clock) (1 digit) + 'p': "PM", // %p = PM/AM + 'P': "pm", // %P = pm/am (lowercase) + 'M': "04", // %M = minute (2 digits) + 'S': "05", // %S = seconds (2 digits) + 'f': "999999", // .%f = fraction of seconds - up to microseconds (6 digits) - deci/milli/micro + 'Z': "MST", // %Z = timezone name (GMT, JST, UTC etc) + // %z is present in timezone options + + // some additional options not in strftime to support additional options such as + // disallow 0 padding in minute and seconds, nanosecond precision, etc + 'o': "1", // %o = month of year (1 digit) (1-12) + 'i': "4", // %i = minute (1 digit) + 's': "5", // %s = seconds (1 digit) + 'N': "999999999", // .%N = fraction of seconds - up to microseconds (9 digits) - milli/micro/nano +} + +// some additional options for timezone +// such as allowing colon in timezone offset and specifying the seconds +// timezone offsets are from UTC +var timezoneOptions = map[string]string{ + "z": "Z0700", // %z = timezone offset in +-hhmm / +-(2 digit hour)(2 digit minute) +0500, -0600 etc + "z:M": "Z07:00", // %z:M = timezone offset(+-hh:mm) / +-(2 digit hour):(2 digit minute) +05:00, -06:00 etc + "z:S": "Z07:00:00", // %z:M = timezone offset(+-hh:mm:ss) / +-(2 digit hour):(2 digit minute):(2 digit second) +05:20:00, -06:30:00 etc + "zH": "Z07", // %zH = timezone offset(+-hh) / +-(2 digit hour) +05, -06 etc + "zS": "Z070000", // %zS = timezone offset(+-hhmmss) / +-(2 digit hour)(2 digit minute)(2 digit second) +052000, -063000 etc +} + +type DateTimeParser struct { + layouts []string +} + +func New(layouts []string) *DateTimeParser { + return &DateTimeParser{ + layouts: layouts, + } +} + +func checkTZOptions(formatString string, idx int) (string, int) { + // idx is pointing to % + // idx + 1 is pointing to z + if idx+2 < len(formatString) { + if formatString[idx+2] == ':' { + // check if there is a character after the colon + if idx+3 < len(formatString) && (formatString[idx+3] == 'M' || formatString[idx+3] == 'S') { + return timezoneOptions[fmt.Sprintf("z:%s", string(formatString[idx+3]))], idx + 4 + } + // %z: OR %z: detected; return the default layout Z0700 and increment idx by 2 to print : literally + return timezoneOptions["z"], idx + 2 + } else if formatString[idx+2] == 'H' || formatString[idx+2] == 'S' { + // %zH or %zS detected; return the layouts Z07 / z070000 and increment idx by 2 to point to the next character + // after %zH or %zS + return timezoneOptions[fmt.Sprintf("z%s", string(formatString[idx+2]))], idx + 3 + } + } + return timezoneOptions["z"], idx + 2 +} + +func parseFormatString(formatString string) (string, error) { + var dateTimeLayout strings.Builder + // iterate over the format string and replace the format specifiers with + // the corresponding golang constants + for idx := 0; idx < len(formatString); { + // check if the character is a format delimiter (%) + if formatString[idx] == formatDelimiter { + // check if there is a character after the format delimiter (%) + if idx+1 >= len(formatString) { + return "", fmt.Errorf("invalid format string, expected character after " + string(formatDelimiter)) + } + formatSpecifier := formatString[idx+1] + if layout, ok := formatSpecifierToLayout[formatSpecifier]; ok { + dateTimeLayout.WriteString(layout) + idx += 2 + } else if formatSpecifier == 'z' { + // did not find a valid specifier + // check if it is for timezone + var tzLayout string + tzLayout, idx = checkTZOptions(formatString, idx) + dateTimeLayout.WriteString(tzLayout) + } else { + return "", fmt.Errorf("invalid format string, unknown format specifier: " + string(formatSpecifier)) + } + continue + } + // copy the character as is + dateTimeLayout.WriteByte(formatString[idx]) + idx++ + } + return dateTimeLayout.String(), nil +} + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + for _, layout := range p.layouts { + rv, err := time.Parse(layout, input) + if err == nil { + return rv, layout, nil + } + } + return time.Time{}, "", analysis.ErrInvalidDateTime +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + layouts, ok := config["layouts"].([]interface{}) + if !ok { + return nil, fmt.Errorf("must specify layouts") + } + var layoutStrs []string + for _, layout := range layouts { + layoutStr, ok := layout.(string) + if ok { + layout, err := parseFormatString(layoutStr) + if err != nil { + return nil, err + } + layoutStrs = append(layoutStrs, layout) + } + } + return New(layoutStrs), nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/analysis/datetime/percent/percent_test.go b/analysis/datetime/percent/percent_test.go new file mode 100644 index 000000000..5b6932160 --- /dev/null +++ b/analysis/datetime/percent/percent_test.go @@ -0,0 +1,114 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package percent + +import ( + "fmt" + "testing" +) + +func TestConversionFromPercentStyle(t *testing.T) { + tests := []struct { + input string + output string + err error + }{ + { + input: "%Y-%m-%d", + output: "2006-01-02", + err: nil, + }, + { + input: "%Y/%m%%%%%dT%H%M:%S", + output: "2006/01%%02T1504:05", + err: nil, + }, + { + input: "%Y-%m-%dT%H:%M:%S %Z%z", + output: "2006-01-02T15:04:05 MSTZ0700", + err: nil, + }, + { + input: "%B %e, %Y %l:%i %P %z:M", + output: "January 2, 2006 3:4 pm Z07:00", + err: nil, + }, + { + input: "Hour %H Minute %Mseconds %S.%N Timezone:%Z:S, Weekday %a; Day %d Month %b, Year %y", + output: "Hour 15 Minute 04seconds 05.999999999 Timezone:MST:S, Weekday Mon; Day 02 Month Jan, Year 06", + err: nil, + }, + { + input: "%Y-%m-%dT%H:%M:%S.%N", + output: "2006-01-02T15:04:05.999999999", + err: nil, + }, + { + input: "%H:%M:%S %Z %z", + output: "15:04:05 MST Z0700", + err: nil, + }, + { + input: "%H:%M:%S %Z %z:", + output: "15:04:05 MST Z0700:", + err: nil, + }, + { + input: "%H:%M:%S %Z %z:M", + output: "15:04:05 MST Z07:00", + err: nil, + }, + { + input: "%H:%M:%S %Z %z:A", + output: "15:04:05 MST Z0700:A", + err: nil, + }, + { + input: "%H:%M:%S %Z %zM", + output: "15:04:05 MST Z0700M", + err: nil, + }, + { + input: "%H:%M:%S %Z %zS", + output: "15:04:05 MST Z070000", + err: nil, + }, + { + input: "%H:%M:%S %Z %z%Z %zS%z:%zH", + output: "15:04:05 MST Z0700MST Z070000Z0700:Z07", + err: nil, + }, + { + input: "%Y-%m-%d%T%H:%M:%S %ZM", + output: "", + err: fmt.Errorf("invalid format string, unknown format specifier: T"), + }, + { + input: "%Y-%m-%dT%H:%M:%S %ZM%", + output: "", + err: fmt.Errorf("invalid format string, invalid format string, expected character after %%"), + }, + } + for _, test := range tests { + out, err := parseFormatString(test.input) + if err != nil && test.err == nil || err == nil && test.err != nil { + t.Fatalf("expected error %v, got error %v", test.err, err) + } + if out != test.output { + t.Fatalf("expected output %v, got %v", test.output, out) + } + } + +} diff --git a/analysis/datetime/sanitized/sanitized.go b/analysis/datetime/sanitized/sanitized.go index 33d271e6e..09eb94d1d 100644 --- a/analysis/datetime/sanitized/sanitized.go +++ b/analysis/datetime/sanitized/sanitized.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2023 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/analysis/datetime/sanitized/sanitized_test.go b/analysis/datetime/sanitized/sanitized_test.go index d680b248b..d62e20aad 100644 --- a/analysis/datetime/sanitized/sanitized_test.go +++ b/analysis/datetime/sanitized/sanitized_test.go @@ -1,3 +1,17 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package sanitized import ( diff --git a/config/config.go b/config/config.go index 2f6df4f4d..492b86f74 100644 --- a/config/config.go +++ b/config/config.go @@ -70,7 +70,9 @@ import ( // date time parsers _ "github.com/blevesearch/bleve/v2/analysis/datetime/flexible" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/iso" _ "github.com/blevesearch/bleve/v2/analysis/datetime/optional" + _ "github.com/blevesearch/bleve/v2/analysis/datetime/percent" _ "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" _ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds" _ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds" diff --git a/query.go b/query.go index e18026ec1..3af750a06 100644 --- a/query.go +++ b/query.go @@ -68,6 +68,40 @@ func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusi return query.NewDateRangeInclusiveQuery(start, end, startInclusive, endInclusive) } +// NewDateRangeStringQuery creates a new Query for ranges +// of date values. +// Date strings are parsed using the DateTimeParser set using +// +// the DateRangeStringQuery.SetDateTimeParser() method. +// +// If no DateTimeParser is set, then the +// +// top-level config.QueryDateTimeParser +// +// is used. +func NewDateRangeStringQuery(start, end string) *query.DateRangeStringQuery { + return query.NewDateRangeStringQuery(start, end) +} + +// NewDateRangeStringQuery creates a new Query for ranges +// of date values. +// Date strings are parsed using the DateTimeParser set using +// +// the DateRangeStringQuery.SetDateTimeParser() method. +// +// this DateTimeParser is a custom date time parser defined in the index mapping, +// using AddCustomDateTimeParser() method. +// If no DateTimeParser is set, then the +// +// top-level config.QueryDateTimeParser +// +// is used. +// Either, but not both endpoints can be nil. +// startInclusive and endInclusive control inclusion of the endpoints. +func NewDateRangeInclusiveStringQuery(start, end string, startInclusive, endInclusive *bool) *query.DateRangeStringQuery { + return query.NewDateRangeStringInclusiveQuery(start, end, startInclusive, endInclusive) +} + // NewDisjunctionQuery creates a new compound Query. // Result documents satisfy at least one Query. func NewDisjunctionQuery(disjuncts ...query.Query) *query.DisjunctionQuery { diff --git a/search_test.go b/search_test.go index 12ebc7bd4..37da8da0a 100644 --- a/search_test.go +++ b/search_test.go @@ -30,6 +30,8 @@ import ( html_char_filter "github.com/blevesearch/bleve/v2/analysis/char/html" regexp_char_filter "github.com/blevesearch/bleve/v2/analysis/char/regexp" "github.com/blevesearch/bleve/v2/analysis/datetime/flexible" + "github.com/blevesearch/bleve/v2/analysis/datetime/iso" + "github.com/blevesearch/bleve/v2/analysis/datetime/percent" "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized" "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds" "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds" @@ -2740,7 +2742,7 @@ func TestDateRangeStringQuery(t *testing.T) { for _, dtq := range testQueries { var err error - dateQuery := query.NewDateRangeStringInclusiveQuery(dtq.start, dtq.end, &dtq.includeStart, &dtq.includeEnd) + dateQuery := NewDateRangeInclusiveStringQuery(dtq.start, dtq.end, &dtq.includeStart, &dtq.includeEnd) dateQuery.SetDateTimeParser(dtq.dateTimeParser) dateQuery.SetField(dtq.field) @@ -3229,3 +3231,148 @@ func TestDateRangeTimestampQueries(t *testing.T) { } } } + +func TestPercentAndIsoStyleDates(t *testing.T) { + percentName := percent.Name + isoName := iso.Name + + imap := mapping.NewIndexMapping() + percentConfig := map[string]interface{}{ + "type": percentName, + "layouts": []interface{}{ + "%Y/%m/%d %l:%M%p", // doc 1 + "%d/%m/%Y %H:%M:%S", // doc 2 + "%Y-%m-%dT%H:%M:%S%z", // doc 3 + "%d %B %y %l%p %Z", // doc 4 + "%Y; %b %d (%a) %I:%M:%S.%N%P %z", // doc 5 + }, + } + isoConfig := map[string]interface{}{ + "type": isoName, + "layouts": []interface{}{ + "yyyy/MM/dd h:mma", // doc 1 + "dd/MM/yyyy HH:mm:ss", // doc 2 + "yyyy-MM-dd'T'HH:mm:ssXX", // doc 3 + "dd MMMM yy ha z", // doc 4 + "yyyy; MMM dd (EEE) hh:mm:ss.SSSSSaa xx", // doc 5 + }, + } + + err := imap.AddCustomDateTimeParser("percentDate", percentConfig) + if err != nil { + t.Fatal(err) + } + err = imap.AddCustomDateTimeParser("isoDate", isoConfig) + if err != nil { + t.Fatal(err) + } + + percentField := mapping.NewDateTimeFieldMapping() + percentField.DateFormat = "percentDate" + + isoField := mapping.NewDateTimeFieldMapping() + isoField.DateFormat = "isoDate" + + imap.DefaultMapping.AddFieldMappingsAt("percentDate", percentField) + imap.DefaultMapping.AddFieldMappingsAt("isoDate", isoField) + + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + idx, err := New(tmpIndexPath, imap) + if err != nil { + t.Fatal(err) + } + defer func() { + err = idx.Close() + if err != nil { + t.Fatal(err) + } + }() + + documents := map[string]map[string]interface{}{ + "doc1": { + "percentDate": "2001/08/20 6:00PM", + "isoDate": "2001/08/20 6:00PM", + }, + "doc2": { + "percentDate": "20/08/2001 18:05:00", + "isoDate": "20/08/2001 18:05:00", + }, + "doc3": { + "percentDate": "2001-08-20T18:10:00Z", + "isoDate": "2001-08-20T18:10:00Z", + }, + "doc4": { + "percentDate": "20 August 01 6PM UTC", + "isoDate": "20 August 01 6PM UTC", + }, + "doc5": { + "percentDate": "2001; Aug 20 (Mon) 06:15:15.23456pm +0000", + "isoDate": "2001; Aug 20 (Mon) 06:15:15.23456pm +0000", + }, + } + + batch := idx.NewBatch() + for docID, doc := range documents { + err := batch.Index(docID, doc) + if err != nil { + t.Fatal(err) + } + } + err = idx.Batch(batch) + if err != nil { + t.Fatal(err) + } + + type testStruct struct { + start string + end string + field string + } + + for _, field := range []string{"percentDate", "isoDate"} { + testQueries := []testStruct{ + { + start: "2001/08/20 6:00PM", + end: "2001/08/20 6:20PM", + field: field, + }, + { + start: "20/08/2001 18:00:00", + end: "20/08/2001 18:20:00", + field: field, + }, + { + start: "2001-08-20T18:00:00Z", + end: "2001-08-20T18:20:00Z", + field: field, + }, + { + start: "20 August 01 6PM UTC", + end: "20 August 01 7PM UTC", + field: field, + }, + { + start: "2001; Aug 20 (Mon) 06:00:00.00000pm +0000", + end: "2001; Aug 20 (Mon) 06:20:20.00000pm +0000", + field: field, + }, + } + includeStart := true + includeEnd := true + for _, dtq := range testQueries { + drq := NewDateRangeInclusiveStringQuery(dtq.start, dtq.end, &includeStart, &includeEnd) + drq.SetField(dtq.field) + drq.SetDateTimeParser(field) + sr := NewSearchRequest(drq) + res, err := idx.Search(sr) + if err != nil { + t.Fatal(err) + } + if len(res.Hits) != 5 { + t.Fatalf("expected %d hits, got %d", 5, len(res.Hits)) + } + } + } +}