From 99e1d05101ebaa35e49021e3d84801b82355fb21 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 3 Oct 2024 17:13:18 +0200 Subject: [PATCH] Revise ISO parser, add common formats (#20) * prep common formats * update readme * draft new iso parser * add modifier for a, A, b, B directives, add j directive * update changelog --- CHANGELOG.md | 18 +++ README.md | 7 +- lib/Datetime.zig | 15 +- lib/Formats.zig | 5 + lib/string.zig | 341 +++++++++++++++++++++++++++--------------- tests/test_string.zig | 187 +++++++++++++++++++---- zdt.zig | 3 + 7 files changed, 418 insertions(+), 158 deletions(-) create mode 100644 lib/Formats.zig diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cdc5fa..8659113 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,24 @@ Types of changes ## Unreleased +## 2024-10-03, v0.3.2 + +### Added + +- ISO8601 parser: + - '-' as a year-month or month-day separator now optional + - ':' as a hour-minute or minute-second separator now optional + - capability to parse day-of-year (ordinal; 'yyyy-ooo' format) +- Datetime from string / Parser: + - 'j' directive to parse day-of-year +- Datetime to string / Formatter: + - 's' directive to get Unix time in seconds + - ':a', ':A', ':b', ':B' (modifier option) to get English day / month names, independent of locale + +### Changed + +- internal: ISO8601 parser revised + ## 2024-09-30, v0.3.1 ### Added diff --git a/README.md b/README.md index 34d381d..4f46d96 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,10 @@ **Datetime with Timezones in Zig.** Opinionated, and mostly for learning purposes. - [API overview](https://github.com/FObersteiner/zdt/wiki/API-overview) -- [Demo](https://github.com/FObersteiner/zdt/blob/master/examples/ex_demo.zig): +- [Roadmap](https://github.com/FObersteiner/zdt/wiki/Roadmap) +- Contributions: Welcome! + +### [Demo](https://github.com/FObersteiner/zdt/blob/master/examples/ex_demo.zig) ```zig // need an allocator for the time zones since the size of the rule-files varies @@ -69,7 +72,7 @@ See [changelog](https://github.com/FObersteiner/zdt/blob/master/change.log) ## Zig version -This library is developed with Zig `0.14.0-dev` aka 'master', might not compile with older versions. As of 2024-09-20, Zig-0.13 stable or higher should work. +This library is developed with Zig `0.14.0-dev` aka 'master', might not compile with older versions. As of 2024-10-01, Zig-0.13 stable or higher should work. ## IANA timezone database version diff --git a/lib/Datetime.zig b/lib/Datetime.zig index 857cb77..9130df9 100644 --- a/lib/Datetime.zig +++ b/lib/Datetime.zig @@ -485,6 +485,10 @@ pub fn weekday(dt: Datetime) Weekday { return std.meta.intToEnum(Weekday, dt.weekdayNumber()) catch unreachable; } +pub fn monthEnum(dt: Datetime) Month { + return std.meta.intToEnum(Month, dt.month) catch unreachable; +} + /// Number of the weekday starting at 0 == Sunday (strftime/strptime: %w). pub fn weekdayNumber(dt: Datetime) u8 { const days = cal.dateToRD([3]u16{ dt.year, dt.month, dt.day }); @@ -497,10 +501,6 @@ pub fn weekdayIsoNumber(dt: Datetime) u8 { return cal.ISOweekdayFromUnixdays(days); } -pub fn monthEnum(dt: Datetime) Month { - return std.meta.intToEnum(Month, dt.month) catch unreachable; -} - /// Roll datetime forward to the specified next weekday. Makes a new datetime. pub fn nextWeekday(dt: Datetime, d: Weekday) Datetime { var daysdiff: i8 = 0; @@ -582,6 +582,13 @@ pub fn fromString(string: []const u8, directives: []const u8) !Datetime { /// Make a datetime from a string with an ISO8601-compatibel format. pub fn fromISO8601(string: []const u8) !Datetime { + // 9 digits of fractional seconds and hh:mm:ss UTC offset: 38 characters + if (string.len > 38) + return error.InvalidFormat; + // last character must be Z (UTC) or a digit + if (string[string.len - 1] != 'Z' and !std.ascii.isDigit(string[string.len - 1])) { + return error.InvalidFormat; + } var idx: usize = 0; // assume datetime starts at beginning of string return try Datetime.fromFields(try str.parseISO8601(string, &idx)); } diff --git a/lib/Formats.zig b/lib/Formats.zig new file mode 100644 index 0000000..68af808 --- /dev/null +++ b/lib/Formats.zig @@ -0,0 +1,5 @@ +//! common datetime formats + +pub const DateOnly = "%Y-%m-%d"; +pub const TimeOnly = "%H:%M:%S"; +pub const DateTime = "%Y-%m-%d %H:%M:%S"; diff --git a/lib/string.zig b/lib/string.zig index eaf2ada..01970b6 100644 --- a/lib/string.zig +++ b/lib/string.zig @@ -2,9 +2,12 @@ const builtin = @import("builtin"); const std = @import("std"); +const testing = std.testing; +const assert = std.debug.assert; const log = std.log.scoped(.zdt__stringIO); const Datetime = @import("./Datetime.zig"); +const cal = @import("./calendar.zig"); const Tz = @import("./Timezone.zig"); const ZdtError = @import("./errors.zig").ZdtError; const FormatError = @import("./errors.zig").FormatError; @@ -207,56 +210,67 @@ fn parseIntoFields( fields: *Datetime.Fields, string: []const u8, directive: u8, - idx: *usize, + idx_ptr: *usize, am_pm_flags: *u8, ) !void { switch (directive) { - 'd' => fields.day = try parseDigits(u8, string, idx, 2), + 'd' => fields.day = try parseDigits(u8, string, idx_ptr, 2), + // 'e' - use 'd' // 'a', // locale-specific, day name short // 'A', // locale-specific, day name - 'm' => fields.month = try parseDigits(u8, string, idx, 2), + 'm' => fields.month = try parseDigits(u8, string, idx_ptr, 2), // 'b', // locale-specific, month name short // 'B', // locale-specific, month name - 'Y' => fields.year = try parseDigits(u16, string, idx, 4), - 'y' => fields.year = try parseDigits(u16, string, idx, 2) + Datetime.century, + 'Y' => fields.year = try parseDigits(u16, string, idx_ptr, 4), + 'y' => fields.year = try parseDigits(u16, string, idx_ptr, 2) + Datetime.century, // 'C', - formatting-only // 'G', - 'H' => fields.hour = try parseDigits(u8, string, idx, 2), + 'H' => fields.hour = try parseDigits(u8, string, idx_ptr, 2), + // 'k' - use 'H' 'I' => fields.hour = blk: { // must be in [1..12] - const h = try parseDigits(u8, string, idx, 2); + const h = try parseDigits(u8, string, idx_ptr, 2); am_pm_flags.* |= 4; if (h >= 1 and h <= 12) break :blk h else return error.InvalidFormat; }, - 'P' => am_pm_flags.* |= try parseAmPm(string, idx), - 'p' => am_pm_flags.* |= try parseAmPm(string, idx), - 'M' => fields.minute = try parseDigits(u8, string, idx, 2), - 'S' => fields.second = try parseDigits(u8, string, idx, 2), + 'P' => am_pm_flags.* |= try parseAmPm(string, idx_ptr), + 'p' => am_pm_flags.* |= try parseAmPm(string, idx_ptr), + 'M' => fields.minute = try parseDigits(u8, string, idx_ptr, 2), + 'S' => fields.second = try parseDigits(u8, string, idx_ptr, 2), 'f' => { // if we only parse n digits out of 9, we have to multiply the result by // 10^n to get nanoseconds - const tmp_idx = idx.*; - fields.nanosecond = try parseDigits(u32, string, idx, 9); - const missing = 9 - (idx.* - tmp_idx); + const tmp_idx = idx_ptr.*; + fields.nanosecond = try parseDigits(u32, string, idx_ptr, 9); + const missing = 9 - (idx_ptr.* - tmp_idx); const f: u32 = std.math.powi(u32, 10, @as(u32, @intCast(missing))) catch return error.InvalidFraction; fields.nanosecond *= f; }, 'z' => { // UTC offset (+|-)hh[:mm[:ss]] or Z - const utcoffset = try parseOffset(i32, string, idx, 9); - if (string[idx.* - 1] == 'Z') + const utcoffset = try parseOffset(i32, string, idx_ptr, 9); + if (string[idx_ptr.* - 1] == 'Z') fields.tzinfo = Tz.UTC else fields.tzinfo = try Tz.fromOffset(utcoffset, ""); }, // 'Z', - ambiguous! // 'i', - IANA identifer; would require allocator - // 'j', + 'j' => { + const doy = try parseDigits(u16, string, idx_ptr, 3); + if (doy == 0) return error.InvalidFormat; + if (doy > 365 + @as(u16, @intFromBool(cal.isLeapYear(fields.year)))) return error.InvalidFormat; + const date = cal.rdToDate(cal.dateToRD([3]u16{ fields.year, 1, 1 }) + doy - 1); + fields.month = @truncate(date[1]); + fields.day = @truncate(date[2]); + }, // 'w', // 'u', - // 'U', // 'W', + // 'U', // 'V', - 'T' => fields.* = try parseISO8601(string, idx), + 'T' => { + fields.* = try parseISO8601(string, idx_ptr); + }, // 'x', // locale-specific, date // 'X', // locale-specific, time // 'c', // locale-specific, datetime @@ -274,11 +288,35 @@ fn printIntoWriter( switch (directive) { 'd' => try writer.print("{d:0>2}", .{dt.day}), 'e' => try writer.print("{d: >2}", .{dt.day}), - 'a' => try writer.print("{s}", .{std.mem.sliceTo(getDayNameAbbr(dt.weekdayNumber())[0..], 0)}), // locale-specific, day name short - 'A' => try writer.print("{s}", .{std.mem.sliceTo(getDayName(dt.weekdayNumber())[0..], 0)}), // locale-specific, day name + 'a' => { + switch (mod) { + 0 => try writer.print("{s}", .{std.mem.sliceTo(getDayNameAbbr(dt.weekdayNumber())[0..], 0)}), // locale-specific, day name short + 1 => try writer.print("{s}", .{dt.weekday().shortName()}), + else => return error.InvalidFormat, + } + }, + 'A' => { + switch (mod) { + 0 => try writer.print("{s}", .{std.mem.sliceTo(getDayName(dt.weekdayNumber())[0..], 0)}), // locale-specific, day name + 1 => try writer.print("{s}", .{dt.weekday().longName()}), + else => return error.InvalidFormat, + } + }, 'm' => try writer.print("{d:0>2}", .{dt.month}), - 'b' => try writer.print("{s}", .{std.mem.sliceTo(getMonthNameAbbr(dt.month - 1)[0..], 0)}), // locale-specific, month name short - 'B' => try writer.print("{s}", .{std.mem.sliceTo(getMonthName(dt.month - 1)[0..], 0)}), // locale-specific, month name + 'b' => { + switch (mod) { + 0 => try writer.print("{s}", .{std.mem.sliceTo(getMonthNameAbbr(dt.month - 1)[0..], 0)}), // locale-specific, month name short + 1 => try writer.print("{s}", .{dt.monthEnum().shortName()}), + else => return error.InvalidFormat, + } + }, + 'B' => { + switch (mod) { + 0 => try writer.print("{s}", .{std.mem.sliceTo(getMonthName(dt.month - 1)[0..], 0)}), // locale-specific, month name + 1 => try writer.print("{s}", .{dt.monthEnum().longName()}), + else => return error.InvalidFormat, + } + }, 'Y' => try writer.print("{d:0>4}", .{dt.year}), 'y' => try writer.print("{d:0>2}", .{dt.year % 100}), 'C' => try writer.print("{d:0>2}", .{dt.year / 100}), @@ -319,47 +357,47 @@ fn printIntoWriter( 'j' => try writer.print("{d:0>3}", .{dt.dayOfYear()}), 'w' => try writer.print("{d}", .{dt.weekdayNumber()}), 'u' => try writer.print("{d}", .{dt.weekdayIsoNumber()}), - 'U' => try writer.print("{d:0>2}", .{dt.weekOfYearSun()}), 'W' => try writer.print("{d:0>2}", .{dt.weekOfYearMon()}), + 'U' => try writer.print("{d:0>2}", .{dt.weekOfYearSun()}), 'V' => try writer.print("{d:0>2}", .{dt.toISOCalendar().isoweek}), 'T' => try dt.format("", .{}, writer), // 'x', // locale-specific, date // 'X', // locale-specific, time // 'c', // locale-specific, datetime - // 's' - Unix seconds + 's' => try writer.print("{d}", .{dt.__unix}), '%' => try writer.print("%", .{}), else => return error.InvalidDirective, } } -fn parseDigits(comptime T: type, string: []const u8, idx: *usize, maxDigits: usize) !T { - const start_idx = idx.*; +fn parseDigits(comptime T: type, string: []const u8, idx_ptr: *usize, maxDigits: usize) !T { + const start_idx = idx_ptr.*; if (!std.ascii.isDigit(string[start_idx])) return error.InvalidFormat; - idx.* += 1; - while (idx.* < string.len and // check first if string depleted - idx.* < start_idx + maxDigits and - std.ascii.isDigit(string[idx.*])) : (idx.* += 1) + idx_ptr.* += 1; + while (idx_ptr.* < string.len and // check first if string depleted + idx_ptr.* < start_idx + maxDigits and + std.ascii.isDigit(string[idx_ptr.*])) : (idx_ptr.* += 1) {} - return try std.fmt.parseInt(T, string[start_idx..idx.*], 10); + return try std.fmt.parseInt(T, string[start_idx..idx_ptr.*], 10); } // AM or PM string, no matter if upper or lower case. -fn parseAmPm(string: []const u8, idx: *usize) !u8 { - if (idx.* + 2 > string.len) return error.InvalidFormat; +fn parseAmPm(string: []const u8, idx_ptr: *usize) !u8 { + if (idx_ptr.* + 2 > string.len) return error.InvalidFormat; var flag: u8 = 0; - flag = switch (std.ascii.toLower(string[idx.*])) { + flag = switch (std.ascii.toLower(string[idx_ptr.*])) { 'a' => 1, 'p' => 2, else => return error.InvalidFormat, }; - idx.* += 1; - if (std.ascii.toLower(string[idx.*]) != 'm') return error.InvalidFormat; + idx_ptr.* += 1; + if (std.ascii.toLower(string[idx_ptr.*]) != 'm') return error.InvalidFormat; - idx.* += 1; + idx_ptr.* += 1; return flag; } @@ -369,30 +407,30 @@ fn twelve_hour_format(hour: u8) u8 { } // Offset UTC in the from of (+|-)hh[:mm[:ss]] or Z. -fn parseOffset(comptime T: type, string: []const u8, idx: *usize, maxDigits: usize) !T { - const start_idx = idx.*; +fn parseOffset(comptime T: type, string: []const u8, idx_ptr: *usize, maxDigits: usize) !T { + const start_idx = idx_ptr.*; var sign: i2 = 1; switch (string[start_idx]) { '+' => sign = 1, '-' => sign = -1, 'Z' => { - idx.* += 1; + idx_ptr.* += 1; return 0; }, else => return error.InvalidFormat, // must start with sign } - idx.* += 1; - while (idx.* < string.len and // check first if string depleted - idx.* < start_idx + maxDigits and - (std.ascii.isDigit(string[idx.*]) or string[idx.*] == ':')) : (idx.* += 1) + idx_ptr.* += 1; + while (idx_ptr.* < string.len and // check first if string depleted + idx_ptr.* < start_idx + maxDigits and + (std.ascii.isDigit(string[idx_ptr.*]) or string[idx_ptr.*] == ':')) : (idx_ptr.* += 1) {} // clean offset string: var index: usize = 0; var offset_chars = [6]u8{ 48, 48, 48, 48, 48, 48 }; // start with 000000; - for (string[start_idx + 1 .. idx.*]) |c| { // hhmmss + for (string[start_idx + 1 .. idx_ptr.*]) |c| { // hhmmss if (c != ':') { offset_chars[index] = c; index += 1; @@ -409,7 +447,20 @@ fn parseOffset(comptime T: type, string: []const u8, idx: *usize, maxDigits: usi return sign * (hours * 3600 + minutes * 60 + seconds); } -/// Parse ISO8601 formats. Format is infered at runtime. +const ISOParserState = enum(u8) { + Year, + Ordinal, + Month, + Day, + DateTimeSep, + Hour, + Minute, + Second, + Fraction, + Offset, +}; + +/// Parse ISO8601 formats. The format is infered at runtime. /// Requires at least a year and a month, separated by ASCII minus. /// Date and time separator is either 'T' or ASCII space. /// @@ -425,87 +476,120 @@ fn parseOffset(comptime T: type, string: []const u8, idx: *usize, maxDigits: usi /// 2014-08-23 12:15:56+01 22 2014-08-23T12:15:56+01:00 /// 2014-08-23T12:15:56-0530 24 2014-08-23T12:15:56-05:30 /// 2014-08-23T12:15:56+02:15:30 28 2014-08-23T12:15:56+02:15:30 -pub fn parseISO8601(string: []const u8, idx: *usize) !Datetime.Fields { - if (string.len > 38) // 9 digits of fractional seconds and hh:mm:ss UTC offset - return error.InvalidFormat; - if (string[string.len - 1] != 'Z' and !std.ascii.isDigit(string[string.len - 1])) { - return error.InvalidFormat; - } - if (string.len < 20) { - switch (string.len) { - 7, 10, 16, 19 => {}, - else => return error.InvalidFormat, - } - } - +pub fn parseISO8601(string: []const u8, idx_ptr: *usize) !Datetime.Fields { var fields = Datetime.Fields{}; var utcoffset: ?i32 = null; + var state: ISOParserState = .Year; + var check_idx: usize = idx_ptr.*; - // since this is a runtime-parser, we need to step through the input - // and stop doing so once we reach the end (break the 'parseblock') - parseblock: { - // yyyy-mm - fields.year = try parseDigits(u16, string, idx, 4); - if (idx.* != 4) return error.InvalidFormat; // 2-digit year not allowed - if (string[idx.*] != '-') return error.InvalidFormat; - idx.* += 1; - fields.month = try parseDigits(u8, string, idx, 2); - if (idx.* != 7) return error.InvalidFormat; // 1-digit month not allowed - if (idx.* == string.len) break :parseblock; - - // yyyy-mm-dd - if (string[idx.*] != '-') return error.InvalidFormat; - idx.* += 1; - fields.day = try parseDigits(u8, string, idx, 2); - if (idx.* != 10) return error.InvalidFormat; // 1-digit day not allowed - if (idx.* == string.len) break :parseblock; - - // yyyy-mm-ddTHH:MM - if (!(string[idx.*] == 'T' or string[idx.*] == ' ')) return error.InvalidFormat; - idx.* += 1; - fields.hour = try parseDigits(u8, string, idx, 2); - if (idx.* != 13) return error.InvalidFormat; // 1-digit hour not allowed - if (string[idx.*] != ':') return error.InvalidFormat; - idx.* += 1; - fields.minute = try parseDigits(u8, string, idx, 2); - if (idx.* != 16) return error.InvalidFormat; // 1-digit minute not allowed - if (idx.* == string.len) break :parseblock; - - // yyyy-mm-ddTHH:MM:SS - seconds are optional - if (string[idx.*] == ':') { - idx.* += 1; - fields.second = try parseDigits(u8, string, idx, 2); - if (idx.* != 19) return error.InvalidFormat; // 1-digit second not allowed - if (idx.* == string.len) break :parseblock; - } - - // yyyy-mm-ddTHH:MM:SS[+-](offset or Z) - if (string[idx.*] == '+' or - string[idx.*] == '-' or - string[idx.*] == 'Z') - { - utcoffset = try parseOffset(i32, string, idx, 9); - if (idx.* == string.len) break :parseblock; - return error.InvalidFormat; // offset must not be followed by other fields + parsing: while (idx_ptr.* < string.len) { + switch (state) { + .Year => { + fields.year = try parseDigits(u16, string, idx_ptr, 4); + if (idx_ptr.* - check_idx != 4) return error.InvalidFormat; // assert 4 digit year + if (idx_ptr.* == string.len) return error.InvalidFormat; // year-only not allowed + if (string[idx_ptr.*] == '-') idx_ptr.* += 1; // opt. y-m separator + state = if (string[idx_ptr.*..].len == 3) .Ordinal else .Month; + continue :parsing; + }, + .Ordinal => { + check_idx = idx_ptr.*; + const doy = try parseDigits(u16, string, idx_ptr, 3); + if (idx_ptr.* - check_idx != 3) return error.InvalidFormat; // assert 3 digit ordinal + if (doy == 0) return error.InvalidFormat; + if (doy > 365 + @as(u16, @intFromBool(cal.isLeapYear(fields.year)))) return error.InvalidFormat; + const date = cal.rdToDate(cal.dateToRD([3]u16{ fields.year, 1, 1 }) + doy - 1); + fields.month = @truncate(date[1]); + fields.day = @truncate(date[2]); + break :parsing; + }, + .Month => { + check_idx = idx_ptr.*; + fields.month = try parseDigits(u8, string, idx_ptr, 2); + if (idx_ptr.* - check_idx != 2) return error.InvalidFormat; // assert 2 digit month + state = .Day; + continue :parsing; + }, + .Day => { + if (string[idx_ptr.*] == '-') idx_ptr.* += 1; // opt. m-d separator + check_idx = idx_ptr.*; + fields.day = try parseDigits(u8, string, idx_ptr, 2); + if (idx_ptr.* - check_idx != 2) return error.InvalidFormat; // assert 2 digit day + state = .DateTimeSep; + continue :parsing; + }, + .DateTimeSep => { + if (!(string[idx_ptr.*] == 'T' or string[idx_ptr.*] == ' ')) { + return error.InvalidFormat; + } + idx_ptr.* += 1; + state = .Hour; + continue :parsing; + }, + .Hour => { + check_idx = idx_ptr.*; + fields.hour = try parseDigits(u8, string, idx_ptr, 2); + if (idx_ptr.* - check_idx != 2) return error.InvalidFormat; // assert 2 digit hour + state = .Minute; + continue :parsing; + }, + .Minute => { + if (string[idx_ptr.*] == ':') idx_ptr.* += 1; // opt. h:m separator + check_idx = idx_ptr.*; + fields.minute = try parseDigits(u8, string, idx_ptr, 2); + if (idx_ptr.* - check_idx != 2) return error.InvalidFormat; // assert 2 digit minute + // next might be offset, but not fraction + if (peekChar(string, idx_ptr)) |c| { + if (c == '+' or c == '-' or c == 'Z') { + state = .Offset; + continue :parsing; + } + } + state = .Second; + continue :parsing; + }, + .Second => { + if (string[idx_ptr.*] == ':') idx_ptr.* += 1; // opt. m:s separator + check_idx = idx_ptr.*; + fields.second = try parseDigits(u8, string, idx_ptr, 2); + if (idx_ptr.* - check_idx != 2) return error.InvalidFormat; // assert 2 digit second + // next might be offset or fraction + if (peekChar(string, idx_ptr)) |c| { + if (c == '+' or c == '-' or c == 'Z') { + state = .Offset; + continue :parsing; + } + if (c == '.' or c == ',') { + idx_ptr.* += 1; + state = .Fraction; + continue :parsing; + } + } + break :parsing; + }, + .Fraction => { + const tmp_idx = idx_ptr.*; + fields.nanosecond = try parseDigits(u32, string, idx_ptr, 9); + const missing = 9 - (idx_ptr.* - tmp_idx); + const f: u32 = std.math.powi(u32, 10, @as(u32, @intCast(missing))) catch return error.InvalidFraction; + fields.nanosecond *= f; + if (peekChar(string, idx_ptr)) |c| { + if (c == '+' or c == '-' or c == 'Z') { + state = .Offset; + continue :parsing; + } + } + break :parsing; + }, + .Offset => { + utcoffset = try parseOffset(i32, string, idx_ptr, 9); + break :parsing; + }, } - - // yyyy-mm-ddTHH:MM:SS.fff (fractional seconds separator can either be '.' or ',') - if (!(string[idx.*] == '.' or string[idx.*] == ',')) return error.InvalidFormat; - idx.* += 1; - // parse any number of fractional seconds up to 9 - const tmp_idx = idx.*; - fields.nanosecond = try parseDigits(u32, string, idx, 9); - const missing = 9 - (idx.* - tmp_idx); - const f: u32 = std.math.powi(u32, 10, @as(u32, @intCast(missing))) catch return error.InvalidFraction; - fields.nanosecond *= f; - if (idx.* == string.len) break :parseblock; - - // trailing UTC offset - utcoffset = try parseOffset(i32, string, idx, 9); } if (utcoffset != null) { - if (string[idx.* - 1] == 'Z') + if (string[idx_ptr.* - 1] == 'Z') fields.tzinfo = Tz.UTC else fields.tzinfo = try Tz.fromOffset(utcoffset.?, ""); @@ -514,6 +598,21 @@ pub fn parseISO8601(string: []const u8, idx: *usize) !Datetime.Fields { return fields; } +fn peekChar(string: []const u8, idx_ptr: *usize) ?u8 { + if (idx_ptr.* >= string.len) return null; + return string[idx_ptr.*]; +} + +test "peek" { + const string: []const u8 = "text"; + var idx: usize = 3; + var peek = peekChar(string, &idx); + try testing.expectEqual(peek.?, 't'); + idx = 4; + peek = peekChar(string, &idx); + try testing.expectEqual(peek, null); +} + // ----- // helpers for %a %A %b %B // -----> diff --git a/tests/test_string.zig b/tests/test_string.zig index 3df0dfb..5000780 100644 --- a/tests/test_string.zig +++ b/tests/test_string.zig @@ -13,6 +13,7 @@ const time_mask = switch (builtin.os.tag) { const zdt = @import("zdt"); const Datetime = zdt.Datetime; +const Formats = zdt.Formats; const td = zdt.Duration; const Tz = zdt.Timezone; @@ -63,6 +64,12 @@ test "format naive datetimes with format string api" { buf.clearAndFree(); try case.dt.toString("%Y-%m-%d %H:%M:%S", buf.writer()); try testing.expectEqualStrings(case.string, buf.items); + buf.clearAndFree(); + try case.dt.toString(Formats.DateOnly ++ " " ++ Formats.TimeOnly, buf.writer()); + try testing.expectEqualStrings(case.string, buf.items); + buf.clearAndFree(); + try case.dt.toString(Formats.DateTime, buf.writer()); + try testing.expectEqualStrings(case.string, buf.items); } } @@ -255,7 +262,7 @@ test "format with Z" { try testing.expectEqualStrings(string_dst, s_dst.items); } -test "format with abbreviated day name" { +test "format with abbreviated day name, locale-specific" { if (!locale_ok()) return error.SkipZigTest; var buf = std.ArrayList(u8).init(testing.allocator); @@ -270,7 +277,17 @@ test "format with abbreviated day name" { try testing.expectEqualStrings(string, buf.items); } -test "format with day name" { +test "format with abbreviated day name, enforce English" { + var buf = std.ArrayList(u8).init(testing.allocator); + defer buf.deinit(); + const dt = Datetime.epoch; + const string = "Thu"; + const directive = "%:a"; + try dt.toString(directive, buf.writer()); + try testing.expectEqualStrings(string, buf.items); +} + +test "format with day name, locale-specific" { if (!locale_ok()) return error.SkipZigTest; var buf = std.ArrayList(u8).init(testing.allocator); @@ -285,7 +302,17 @@ test "format with day name" { try testing.expectEqualStrings(string, buf.items); } -test "format with abbreviated month name" { +test "format with day name, enforce English" { + var buf = std.ArrayList(u8).init(testing.allocator); + defer buf.deinit(); + const dt = Datetime.epoch; + const string = "Thursday"; + const directive = "%:A"; + try dt.toString(directive, buf.writer()); + try testing.expectEqualStrings(string, buf.items); +} + +test "format with abbreviated month name, locale-specific" { if (!locale_ok()) return error.SkipZigTest; var buf = std.ArrayList(u8).init(testing.allocator); @@ -300,7 +327,17 @@ test "format with abbreviated month name" { try testing.expectEqualStrings(string, buf.items); } -test "format with month name" { +test "format with abbreviated month name, enforce English" { + var buf = std.ArrayList(u8).init(testing.allocator); + defer buf.deinit(); + const dt = Datetime.epoch; + const string = "Jan"; + const directive = "%:b"; + try dt.toString(directive, buf.writer()); + try testing.expectEqualStrings(string, buf.items); +} + +test "format with month name, locale-specific" { if (!locale_ok()) return error.SkipZigTest; var buf = std.ArrayList(u8).init(testing.allocator); @@ -315,6 +352,16 @@ test "format with month name" { try testing.expectEqualStrings(string, buf.items); } +test "format with month name, enforce English" { + var buf = std.ArrayList(u8).init(testing.allocator); + defer buf.deinit(); + const dt = Datetime.epoch; + const string = "January"; + const directive = "%:B"; + try dt.toString(directive, buf.writer()); + try testing.expectEqualStrings(string, buf.items); +} + test "format with 12 hour clock" { const HourTestCase = struct { hour: u8, @@ -445,6 +492,30 @@ test "format with 2-digit century" { } } +test "format with %s to get Unix time in seconds" { + const cases = [_]TestCase{ + .{ + .dt = try Datetime.fromFields(.{ .year = 1970, .month = 1, .day = 1 }), + .string = "0", + }, + .{ + .dt = try Datetime.fromFields(.{ .year = 1691, .month = 9, .day = 15 }), + .string = "-8782128000", + }, + .{ + .dt = try Datetime.fromFields(.{ .year = 2024, .month = 9, .day = 21 }), + .string = "1726876800", + }, + }; + + inline for (cases) |case| { + var buf = std.ArrayList(u8).init(testing.allocator); + try case.dt.toString("%s", buf.writer()); + try testing.expectEqualStrings(case.string, buf.items); + buf.deinit(); + } +} + // ---- String to Datetime ---- test "comptime parse with comptime format string #1" { @@ -532,24 +603,6 @@ test "parse %I and am/pm errors" { try testing.expectError(error.InvalidFormat, err); } -test "comptime parse ISO " { - const cases = [_]TestCase{ - .{ - .string = "2021-02-18T17:00:00.1", - .dt = try Datetime.fromFields(.{ .year = 2021, .month = 2, .day = 18, .hour = 17, .nanosecond = 100_000_000 }), - }, - .{ - .string = "2021-02-18T17:00:00.123456", - .dt = try Datetime.fromFields(.{ .year = 2021, .month = 2, .day = 18, .hour = 17, .nanosecond = 123_456_000 }), - }, - }; - - inline for (cases) |case| { - const dt = try Datetime.fromString(case.string, "%T"); - try testing.expectEqual(case.dt, dt); - } -} - test "comptime parse with fractional part" { const cases = [_]TestCase{ .{ @@ -614,6 +667,31 @@ test "parse single digits" { } } +test "parse day of year with %j" { + const cases = [_]TestCase{ + .{ + .string = "2014-082", + .directive = "%Y-%j", + .dt = try Datetime.fromFields(.{ .year = 2014, .month = 3, .day = 23 }), + }, + .{ + .string = "2014082", + .directive = "%Y%j", + .dt = try Datetime.fromFields(.{ .year = 2014, .month = 3, .day = 23 }), + }, + .{ + .string = "2024366", + .directive = "%Y%j", + .dt = try Datetime.fromFields(.{ .year = 2024, .month = 12, .day = 31 }), + }, + }; + + inline for (cases) |case| { + const dt = try Datetime.fromString(case.string, case.directive); + try testing.expectEqual(case.dt, dt); + } +} + test "parsing directives do not match fields in string" { var err = Datetime.fromString("1970-01-01 00:00:00", "%Y-%m-%d %H%%%M%%%S"); try testing.expectError(error.InvalidFormat, err); @@ -763,19 +841,27 @@ test "parse ISO" { var dt_ref = try Datetime.fromFields(.{ .year = 2014, .month = 8 }); var dt = try Datetime.fromISO8601("2014-08"); try testing.expect(std.meta.eql(dt_ref, dt)); - // TODO : - // dt = try Datetime.fromISO8601("201408"); - // try testing.expect(std.meta.eql(dt_ref, dt)); + + dt = try Datetime.fromISO8601("201408"); + try testing.expect(std.meta.eql(dt_ref, dt)); dt_ref = try Datetime.fromFields(.{ .year = 2014, .month = 8, .day = 23 }); dt = try Datetime.fromISO8601("2014-08-23"); try testing.expect(std.meta.eql(dt_ref, dt)); - // TODO : - // dt = try Datetime.fromISO8601("20140823"); - // try testing.expect(std.meta.eql(dt_ref, dt)); - // TODO : - // year-doy + dt = try Datetime.fromISO8601("20140823"); + try testing.expect(std.meta.eql(dt_ref, dt)); + + dt_ref = try Datetime.fromFields(.{ .year = 2014, .month = 3, .day = 23 }); + dt = try Datetime.fromISO8601("2014-082"); + try testing.expect(std.meta.eql(dt_ref, dt)); + + dt = try Datetime.fromISO8601("2014082"); + try testing.expect(std.meta.eql(dt_ref, dt)); + + dt_ref = try Datetime.fromFields(.{ .year = 2024, .month = 12, .day = 31 }); + dt = try Datetime.fromISO8601("2024-366"); + try testing.expect(std.meta.eql(dt_ref, dt)); dt_ref = try Datetime.fromFields(.{ .year = 2014, .month = 8, .day = 23, .hour = 12, .minute = 15 }); dt = try Datetime.fromISO8601("2014-08-23 12:15"); @@ -833,8 +919,44 @@ test "parse ISO" { try testing.expect(std.meta.eql(dt_ref, dt)); } +test "comptime parse ISO with %T" { + const cases = [_]TestCase{ + .{ + .string = "2021-02-18T17:00:00.1", + .directive = "%T", + .dt = try Datetime.fromFields(.{ .year = 2021, .month = 2, .day = 18, .hour = 17, .nanosecond = 100_000_000 }), + }, + .{ + .string = "2021-02-18T17:00:00.123456", + .directive = "%T", + .dt = try Datetime.fromFields(.{ .year = 2021, .month = 2, .day = 18, .hour = 17, .nanosecond = 123_456_000 }), + }, + .{ + .string = "text 2021-02-18T17:00:00.123456", + .directive = "text %T", + .dt = try Datetime.fromFields(.{ .year = 2021, .month = 2, .day = 18, .hour = 17, .nanosecond = 123_456_000 }), + }, + .{ + .string = "text 2021-02-18T17:00:00.123456 more text", + .directive = "text %T more text", + .dt = try Datetime.fromFields(.{ .year = 2021, .month = 2, .day = 18, .hour = 17, .nanosecond = 123_456_000 }), + }, + }; + + inline for (cases) |case| { + const dt = try Datetime.fromString(case.string, case.directive); + try testing.expectEqual(case.dt, dt); + } +} + test "not ISO8601" { - var err = Datetime.fromISO8601("2014-08-23T12:15:56+-0200"); // invalid offset + var err = Datetime.fromISO8601("2014000"); + try testing.expectError(error.InvalidFormat, err); + + err = Datetime.fromISO8601("2014366"); // ordinal invald: 2014 is not a leap year + try testing.expectError(error.InvalidFormat, err); + + err = Datetime.fromISO8601("2024367"); // ordinal invald: 2024 is a leap year but this has 366 days try testing.expectError(error.InvalidFormat, err); err = Datetime.fromISO8601("2014"); // year-only not allowed @@ -866,4 +988,7 @@ test "not ISO8601" { err = Datetime.fromISO8601("2014-02-03T23:00:00..314"); // invlid fractional secs separator try testing.expectError(error.InvalidFormat, err); + + err = Datetime.fromISO8601("2014-08-23T12:15:56+-0200"); // invalid offset + try testing.expectError(error.InvalidFormat, err); } diff --git a/zdt.zig b/zdt.zig index 53adb93..8f92c51 100644 --- a/zdt.zig +++ b/zdt.zig @@ -5,6 +5,7 @@ const std = @import("std"); const log = std.log.scoped(.zdt__root); pub const Datetime = @import("./lib/Datetime.zig"); +pub const Formats = @import("./lib/Formats.zig"); pub const Timezone = @import("./lib/Timezone.zig"); pub const Duration = @import("./lib/Duration.zig"); @@ -14,8 +15,10 @@ pub const ZdtError = @import("./lib/errors.zig").ZdtError; const calendar = @import("./lib/calendar.zig"); const string = @import("./lib/string.zig"); const tzif = @import("./lib/tzif.zig"); + test { _ = Datetime; + _ = Formats; _ = Timezone; _ = Duration; _ = calendar;