Skip to content

Commit

Permalink
MB-58033: Support for custom datetime layouts (#1866)
Browse files Browse the repository at this point in the history
## Jira

[MB-58033](https://issues.couchbase.com/browse/MB-58033)

## Description
- Add 'percentstyle' date time layout format that mimics 'strftime' used
for date time layout specification in C and Python.
- Add 'isostyle' date time layout format that attempts to mimic
'DateTimeFormatter' which is used to specify date time layouts in Java.
- Added unit tests

---------

Co-authored-by: Abhinav Dangeti <[email protected]>
  • Loading branch information
CascadingRadium and abhinavdangeti authored Sep 13, 2023
1 parent 3c4461c commit 1788a80
Show file tree
Hide file tree
Showing 9 changed files with 812 additions and 2 deletions.
243 changes: 243 additions & 0 deletions analysis/datetime/iso/iso.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package iso

import (
"fmt"
"strings"
"time"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)

const Name = "isostyle"

var textLiteralDelimiter byte = '\'' // single quote

// ISO style date strings are represented in
// https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html
//
// Some format specifiers are not specified in go time package, such as:
// - 'V' for timezone name, like 'Europe/Berlin' or 'America/New_York'.
// - 'Q' for quarter of year, like Q3 or 3rd Quarter.
// - 'zzzz' for full name of timezone like "Japan Standard Time" or "Eastern Standard Time".
// - 'O' for localized zone-offset, like GMT+8 or GMT+08:00.
// - '[]' for optional section of the format.
// - 'G' for era, like AD or BC.
// - 'W' for week of month.
// - 'D' for day of year.
// So date strings with these date elements cannot be parsed.
var timeElementToLayout = map[byte]map[int]string{
'M': {
4: "January", // MMMM = full month name
3: "Jan", // MMM = short month name
2: "01", // MM = month of year (2 digits) (01-12)
1: "1", // M = month of year (1 digit) (1-12)
},
'd': {
2: "02", // dd = day of month (2 digits) (01-31)
1: "2", // d = day of month (1 digit) (1-31)
},
'a': {
2: "pm", // aa = pm/am
1: "PM", // a = PM/AM
},
'H': {
2: "15", // HH = hour (24 hour clock) (2 digits)
1: "15", // H = hour (24 hour clock) (1 digit)
},
'm': {
2: "04", // mm = minute (2 digits)
1: "4", // m = minute (1 digit)
},
's': {
2: "05", // ss = seconds (2 digits)
1: "5", // s = seconds (1 digit)
},

// timezone offsets from UTC below
'X': {
5: "Z07:00:00", // XXXXX = timezone offset (+-hh:mm:ss)
4: "Z070000", // XXXX = timezone offset (+-hhmmss)
3: "Z07:00", // XXX = timezone offset (+-hh:mm)
2: "Z0700", // XX = timezone offset (+-hhmm)
1: "Z07", // X = timezone offset (+-hh)
},
'x': {
5: "-07:00:00", // xxxxx = timezone offset (+-hh:mm:ss)
4: "-070000", // xxxx = timezone offset (+-hhmmss)
3: "-07:00", // xxx = timezone offset (+-hh:mm)
2: "-0700", // xx = timezone offset (+-hhmm)
1: "-07", // x = timezone offset (+-hh)
},
}

type DateTimeParser struct {
layouts []string
}

func New(layouts []string) *DateTimeParser {
return &DateTimeParser{
layouts: layouts,
}
}

func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
for _, layout := range p.layouts {
rv, err := time.Parse(layout, input)
if err == nil {
return rv, layout, nil
}
}
return time.Time{}, "", analysis.ErrInvalidDateTime
}

func letterCounter(layout string, idx int) int {
count := 1
for idx+count < len(layout) {
if layout[idx+count] == layout[idx] {
count++
} else {
break
}
}
return count
}

func invalidFormatError(character byte, count int) error {
return fmt.Errorf("invalid format string, unknown format specifier: " + strings.Repeat(string(character), count))
}

func parseISOString(layout string) (string, error) {
var dateTimeLayout strings.Builder

for idx := 0; idx < len(layout); {
// check if the character is a text literal delimiter (')
if layout[idx] == textLiteralDelimiter {
if idx+1 < len(layout) && layout[idx+1] == textLiteralDelimiter {
// if the next character is also a text literal delimiter, then
// copy the character as is
dateTimeLayout.WriteByte(textLiteralDelimiter)
idx += 2
continue
}
// find the next text literal delimiter
for idx++; idx < len(layout); idx++ {
if layout[idx] == textLiteralDelimiter {
break
}
dateTimeLayout.WriteByte(layout[idx])
}
// idx can either be equal to len(layout) if the text literal delimiter is not found
// after the first text literal delimiter or it will be equal to the index of the
// second text literal delimiter
if idx == len(layout) {
// text literal delimiter not found error
return "", fmt.Errorf("invalid format string, expected text literal delimiter: " + string(textLiteralDelimiter))
}
// increment idx to skip the second text literal delimiter
idx++
continue
}
// check if character is a letter in english alphabet - a-zA-Z which are reserved
// for format specifiers
if (layout[idx] >= 'a' && layout[idx] <= 'z') || (layout[idx] >= 'A' && layout[idx] <= 'Z') {
// find the number of times the character occurs consecutively
count := letterCounter(layout, idx)
character := layout[idx]
// first check the table
if layout, ok := timeElementToLayout[character][count]; ok {
dateTimeLayout.WriteString(layout)
} else {
switch character {
case 'y', 'u', 'Y':
// year
if count == 2 {
dateTimeLayout.WriteString("06")
} else {
format := fmt.Sprintf("%%0%ds", count)
dateTimeLayout.WriteString(fmt.Sprintf(format, "2006"))
}
case 'h', 'K':
// hour (1-12)
if count == 2 {
dateTimeLayout.WriteString("03")
} else if count == 1 {
dateTimeLayout.WriteString("3")
} else {
return "", invalidFormatError(character, count)
}
case 'E':
// day of week
if count == 4 {
dateTimeLayout.WriteString("Monday")
} else if count <= 3 {
dateTimeLayout.WriteString("Mon")
} else {
return "", invalidFormatError(character, count)
}
case 'S':
// fraction of second
// .SSS = millisecond
// .SSSSSS = microsecond
// .SSSSSSSSS = nanosecond
if count > 9 {
return "", invalidFormatError(character, count)
}
dateTimeLayout.WriteString(strings.Repeat(string('0'), count))
case 'z':
// timezone id
if count < 5 {
dateTimeLayout.WriteString("MST")
} else {
return "", invalidFormatError(character, count)
}
default:
return "", invalidFormatError(character, count)
}
}
idx += count
} else {
// copy the character as is
dateTimeLayout.WriteByte(layout[idx])
idx++
}
}
return dateTimeLayout.String(), nil
}

func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
layouts, ok := config["layouts"].([]interface{})
if !ok {
return nil, fmt.Errorf("must specify layouts")
}
var layoutStrs []string
for _, layout := range layouts {
layoutStr, ok := layout.(string)
if ok {
layout, err := parseISOString(layoutStr)
if err != nil {
return nil, err
}
layoutStrs = append(layoutStrs, layout)
}
}
return New(layoutStrs), nil
}

func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
}
89 changes: 89 additions & 0 deletions analysis/datetime/iso/iso_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package iso

import (
"fmt"
"testing"
)

func TestConversionFromISOStyle(t *testing.T) {
tests := []struct {
input string
output string
err error
}{
{
input: "yyyy-MM-dd",
output: "2006-01-02",
err: nil,
},
{
input: "uuu/M''''dd'T'HH:m:ss.SSS",
output: "2006/1''02T15:4:05.000",
err: nil,
},
{
input: "YYYY-MM-dd'T'H:mm:ss zzz",
output: "2006-01-02T15:04:05 MST",
err: nil,
},
{
input: "MMMM dd yyyy', 'HH:mm:ss.SSS",
output: "January 02 2006, 15:04:05.000",
err: nil,
},
{
input: "h 'o'''' clock' a, XXX",
output: "3 o' clock PM, Z07:00",
err: nil,
},
{
input: "YYYY-MM-dd'T'HH:mm:ss'Z'",
output: "2006-01-02T15:04:05Z",
err: nil,
},
{
input: "E MMM d H:mm:ss z Y",
output: "Mon Jan 2 15:04:05 MST 2006",
err: nil,
},
{
input: "E MMM DD H:m:s z Y",
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: DD"),
},
{
input: "E MMM''''' H:m:s z Y",
output: "",
err: fmt.Errorf("invalid format string, expected text literal delimiter: '"),
},
{
input: "MMMMM dd yyyy', 'HH:mm:ss.SSS",
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: MMMMM"),
},
}
for _, test := range tests {
out, err := parseISOString(test.input)
if err != nil && test.err == nil || err == nil && test.err != nil {
t.Fatalf("expected error %v, got error %v", test.err, err)
}
if out != test.output {
t.Fatalf("expected output %v, got %v", test.output, out)
}
}

}
Loading

0 comments on commit 1788a80

Please sign in to comment.