diff --git a/Cargo.lock b/Cargo.lock index 915b93a3..718d620c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1178,6 +1178,7 @@ dependencies = [ "indextree", "indicatif", "itertools", + "jiff", "kdl", "log", "logos", @@ -1493,6 +1494,31 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jiff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a45489186a6123c128fdf6016183fcfab7113e1820eb813127e036e287233fb" +dependencies = [ + "jiff-tzdb-platform", + "windows-sys 0.59.0", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91335e575850c5c4c673b9bd467b0e025f164ca59d0564f69d0c2ee0ffad4653" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9835f0060a626fe59f160437bc725491a6af23133ea906500027d1bd2f8f4329" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "jobserver" version = "0.1.32" diff --git a/hipcheck/Cargo.toml b/hipcheck/Cargo.toml index d8a98048..121af774 100644 --- a/hipcheck/Cargo.toml +++ b/hipcheck/Cargo.toml @@ -71,6 +71,7 @@ indexmap = "2.5.0" indextree = "4.6.1" indicatif = { version = "0.17.8", features = ["rayon"] } itertools = "0.13.0" +jiff = "0.1.13" kdl = "4.6.0" log = "0.4.22" logos = "0.14.0" diff --git a/hipcheck/src/policy_exprs/env.rs b/hipcheck/src/policy_exprs/env.rs index 98605cd9..f16d7c88 100644 --- a/hipcheck/src/policy_exprs/env.rs +++ b/hipcheck/src/policy_exprs/env.rs @@ -2,6 +2,7 @@ use crate::policy_exprs::{eval, Error, Expr, Ident, Primitive, Result, F64}; use itertools::Itertools as _; +use jiff::{Span, Zoned}; use std::{cmp::Ordering, collections::HashMap, ops::Not as _}; use Expr::*; use Primitive::*; @@ -228,6 +229,12 @@ enum ArrayType { /// An array of bools. Bool(Vec), + /// An array of datetimes. + DateTime(Vec), + + /// An array of time spans. + Span(Vec), + /// An empty array (no type hints). Empty, } @@ -272,6 +279,29 @@ fn array_type(arr: &[Primitive]) -> Result { } Ok(ArrayType::Bool(result)) } + DateTime(_) => { + let mut result: Vec = Vec::with_capacity(arr.len()); + for elem in arr { + if let DateTime(val) = elem { + result.push(val.clone()); + } else { + return Err(Error::InconsistentArrayTypes); + } + } + Ok(ArrayType::DateTime(result)) + } + Span(_) => { + let mut result: Vec = Vec::with_capacity(arr.len()); + for elem in arr { + if let Span(val) = elem { + result.push(*val); + } else { + return Err(Error::InconsistentArrayTypes); + } + } + Ok(ArrayType::Span(result)) + } + Identifier(_) => unimplemented!("we don't currently support idents in arrays"), } } @@ -455,6 +485,8 @@ fn not(env: &Env, args: &[Expr]) -> Result { Int(_) => Err(Error::BadType(name)), Float(_) => Err(Error::BadType(name)), Bool(arg) => Ok(Primitive::Bool(arg.not())), + DateTime(_) => Err(Error::BadType(name)), + Span(_) => Err(Error::BadType(name)), Identifier(_) => unreachable!("no idents should be here"), }; @@ -480,6 +512,8 @@ fn max(env: &Env, args: &[Expr]) -> Result { .map(|m| Primitive(Float(m))), ArrayType::Bool(_) => Err(Error::BadType(name)), + ArrayType::DateTime(_) => Err(Error::BadType(name)), + ArrayType::Span(_) => Err(Error::BadType(name)), ArrayType::Empty => Err(Error::NoMax), }; @@ -505,6 +539,8 @@ fn min(env: &Env, args: &[Expr]) -> Result { .map(|m| Primitive(Float(m))), ArrayType::Bool(_) => Err(Error::BadType(name)), + ArrayType::DateTime(_) => Err(Error::BadType(name)), + ArrayType::Span(_) => Err(Error::BadType(name)), ArrayType::Empty => Err(Error::NoMin), }; @@ -528,6 +564,8 @@ fn avg(env: &Env, args: &[Expr]) -> Result { } ArrayType::Bool(_) => Err(Error::BadType(name)), + ArrayType::DateTime(_) => Err(Error::BadType(name)), + ArrayType::Span(_) => Err(Error::BadType(name)), ArrayType::Empty => Err(Error::NoAvg), }; @@ -549,6 +587,8 @@ fn median(env: &Env, args: &[Expr]) -> Result { Ok(Primitive(Float(floats[mid]))) } ArrayType::Bool(_) => Err(Error::BadType(name)), + ArrayType::DateTime(_) => Err(Error::BadType(name)), + ArrayType::Span(_) => Err(Error::BadType(name)), ArrayType::Empty => Err(Error::NoMedian), }; @@ -562,6 +602,8 @@ fn count(env: &Env, args: &[Expr]) -> Result { ArrayType::Int(ints) => Ok(Primitive(Int(ints.len() as i64))), ArrayType::Float(floats) => Ok(Primitive(Int(floats.len() as i64))), ArrayType::Bool(bools) => Ok(Primitive(Int(bools.len() as i64))), + ArrayType::DateTime(dts) => Ok(Primitive(Int(dts.len() as i64))), + ArrayType::Span(spans) => Ok(Primitive(Int(spans.len() as i64))), ArrayType::Empty => Ok(Primitive(Int(0))), }; @@ -591,6 +633,18 @@ fn all(env: &Env, args: &[Expr]) -> Result { .process_results(|mut iter| { iter.all(|expr| matches!(expr, Primitive(Bool(true)))) })?, + ArrayType::DateTime(dts) => dts + .iter() + .map(|val| eval_lambda(env, &ident, DateTime(val.clone()), (*body).clone())) + .process_results(|mut iter| { + iter.all(|expr| matches!(expr, Primitive(Bool(true)))) + })?, + ArrayType::Span(spans) => spans + .iter() + .map(|val| eval_lambda(env, &ident, Span(*val), (*body).clone())) + .process_results(|mut iter| { + iter.all(|expr| matches!(expr, Primitive(Bool(true)))) + })?, ArrayType::Empty => true, }; @@ -623,6 +677,18 @@ fn nall(env: &Env, args: &[Expr]) -> Result { .process_results(|mut iter| { iter.all(|expr| matches!(expr, Primitive(Bool(true)))).not() })?, + ArrayType::DateTime(dts) => dts + .iter() + .map(|val| eval_lambda(env, &ident, DateTime(val.clone()), (*body).clone())) + .process_results(|mut iter| { + iter.all(|expr| matches!(expr, Primitive(Bool(true)))).not() + })?, + ArrayType::Span(spans) => spans + .iter() + .map(|val| eval_lambda(env, &ident, Span(*val), (*body).clone())) + .process_results(|mut iter| { + iter.all(|expr| matches!(expr, Primitive(Bool(true)))).not() + })?, ArrayType::Empty => false, }; @@ -655,6 +721,18 @@ fn some(env: &Env, args: &[Expr]) -> Result { .process_results(|mut iter| { iter.any(|expr| matches!(expr, Primitive(Bool(true)))) })?, + ArrayType::DateTime(dts) => dts + .iter() + .map(|val| eval_lambda(env, &ident, DateTime(val.clone()), (*body).clone())) + .process_results(|mut iter| { + iter.any(|expr| matches!(expr, Primitive(Bool(true)))) + })?, + ArrayType::Span(spans) => spans + .iter() + .map(|val| eval_lambda(env, &ident, Span(*val), (*body).clone())) + .process_results(|mut iter| { + iter.any(|expr| matches!(expr, Primitive(Bool(true)))) + })?, ArrayType::Empty => false, }; @@ -687,6 +765,18 @@ fn none(env: &Env, args: &[Expr]) -> Result { .process_results(|mut iter| { iter.any(|expr| matches!(expr, Primitive(Bool(true)))).not() })?, + ArrayType::DateTime(dts) => dts + .iter() + .map(|val| eval_lambda(env, &ident, DateTime(val.clone()), (*body).clone())) + .process_results(|mut iter| { + iter.any(|expr| matches!(expr, Primitive(Bool(true)))).not() + })?, + ArrayType::Span(spans) => spans + .iter() + .map(|val| eval_lambda(env, &ident, Span(*val), (*body).clone())) + .process_results(|mut iter| { + iter.any(|expr| matches!(expr, Primitive(Bool(true)))).not() + })?, ArrayType::Empty => true, }; @@ -734,6 +824,33 @@ fn filter(env: &Env, args: &[Expr]) -> Result { } }) .collect::>>()?, + ArrayType::DateTime(dts) => dts + .iter() + .map(|val| { + Ok(( + val, + eval_lambda(env, &ident, DateTime(val.clone()), (*body).clone()), + )) + }) + .filter_map_ok(|(val, expr)| { + if let Ok(Primitive(Bool(true))) = expr { + Some(Primitive::DateTime(val.clone())) + } else { + None + } + }) + .collect::>>()?, + ArrayType::Span(spans) => spans + .iter() + .map(|val| Ok((val, eval_lambda(env, &ident, Span(*val), (*body).clone())))) + .filter_map_ok(|(val, expr)| { + if let Ok(Primitive(Bool(true))) = expr { + Some(Primitive::Span(*val)) + } else { + None + } + }) + .collect::>>()?, ArrayType::Empty => Vec::new(), }; @@ -775,6 +892,24 @@ fn foreach(env: &Env, args: &[Expr]) -> Result { Err(err) => Err(err), }) .collect::>>()?, + ArrayType::DateTime(dts) => dts + .iter() + .map(|val| eval_lambda(env, &ident, DateTime(val.clone()), (*body).clone())) + .map(|expr| match expr { + Ok(Primitive(inner)) => Ok(inner), + Ok(_) => Err(Error::BadType(name)), + Err(err) => Err(err), + }) + .collect::>>()?, + ArrayType::Span(spans) => spans + .iter() + .map(|val| eval_lambda(env, &ident, Span(*val), (*body).clone())) + .map(|expr| match expr { + Ok(Primitive(inner)) => Ok(inner), + Ok(_) => Err(Error::BadType(name)), + Err(err) => Err(err), + }) + .collect::>>()?, ArrayType::Empty => Vec::new(), }; diff --git a/hipcheck/src/policy_exprs/error.rs b/hipcheck/src/policy_exprs/error.rs index 80b4c5ad..61602192 100644 --- a/hipcheck/src/policy_exprs/error.rs +++ b/hipcheck/src/policy_exprs/error.rs @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 use crate::policy_exprs::{Expr, Ident, LexingError}; +use jiff::Error as JError; use nom::{error::ErrorKind, Needed}; use ordered_float::FloatIsNan; +use std::fmt; /// `Result` which uses [`Error`]. pub type Result = std::result::Result; @@ -126,6 +128,26 @@ pub enum UnrepresentableJSONType { JSONNull, } +// Custom error to handle jiff's native error not impl PartialEq +// We exploit the fact that it *does* impl Display +#[derive(Clone, Debug, thiserror::Error, PartialEq)] +pub struct JiffError { + jiff_error: String, +} + +impl JiffError { + pub fn new(err: JError) -> Self { + let msg = err.to_string(); + JiffError { jiff_error: msg } + } +} + +impl fmt::Display for JiffError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.jiff_error) + } +} + fn needed_str(needed: &Needed) -> String { match needed { Needed::Unknown => String::from(""), diff --git a/hipcheck/src/policy_exprs/expr.rs b/hipcheck/src/policy_exprs/expr.rs index b1e00740..1e175a30 100644 --- a/hipcheck/src/policy_exprs/expr.rs +++ b/hipcheck/src/policy_exprs/expr.rs @@ -6,6 +6,7 @@ use crate::policy_exprs::{ Error, Result, Tokens, }; use itertools::Itertools; +use jiff::{Span, Zoned}; use nom::{ branch::alt, combinator::{all_consuming, map}, @@ -49,6 +50,21 @@ pub enum Primitive { /// Boolean. Bool(bool), + + /// Date-time value with timezone information using the [jiff] crate, which uses a modified version of ISO8601. + /// This must include a date in the format --
. + /// An optional time in the format T:[MM]:[SS] will be accepted after the date. + /// Decimal fractions of hours and minutes are not allowed; use smaller time units instead (e.g. T10:30 instead of T10.5). Decimal fractions of seconds are allowed. + /// The timezone is always set to UTC, but you can set an offeset from UTC by including +{HH}:[MM] or -{HH}:[MM]. The time will be adjusted to the correct UTC time during parsing. + DateTime(Zoned), + + /// Span of time using the [jiff] crate, which uses a modified version of ISO8601. + /// Can include years, months, weeks, days, hours, minutes, and seconds (including decimal fractions of a second). + /// Spans are preceded by the letter "P" with any optional time units separated from optional date units by the letter "T". + /// All units of dates and times are represented by single case-agnostic letter abbreviations after the number. + /// For example, a span of one year, one month, one week, one day, one hour, one minute, and one-and-a-tenth seconds would be represented as + /// "P1y1m1w1dT1h1m1.1s" + Span(Span), } /// A variable or function identifier. @@ -89,6 +105,8 @@ impl Display for Primitive { Primitive::Int(i) => write!(f, "{}", i), Primitive::Float(fl) => write!(f, "{}", fl), Primitive::Bool(b) => write!(f, "{}", if *b { "#t" } else { "#f" }), + Primitive::DateTime(dt) => write!(f, "{}", dt), + Primitive::Span(span) => write!(f, "{}", span), } } } @@ -141,6 +159,16 @@ crate::data_variant_parser! { pattern = Token::Bool(b) => Primitive::Bool(b); } +crate::data_variant_parser! { + fn parse_datetime(input) -> Result; + pattern = Token::DateTime(dt) => Primitive::DateTime(*dt); +} + +crate::data_variant_parser! { + fn parse_span(input) -> Result; + pattern = Token::Span(span) => Primitive::Span(*span); +} + crate::data_variant_parser! { fn parse_ident(input) -> Result; pattern = Token::Ident(s) => s.to_owned(); @@ -156,7 +184,13 @@ pub type Input<'source> = Tokens<'source, Token>; /// Parse a single piece of primitive data. fn parse_primitive(input: Input<'_>) -> IResult, Primitive> { - alt((parse_integer, parse_float, parse_bool))(input) + alt(( + parse_integer, + parse_float, + parse_bool, + parse_datetime, + parse_span, + ))(input) } /// Parse an array. diff --git a/hipcheck/src/policy_exprs/token.rs b/hipcheck/src/policy_exprs/token.rs index f42d6ac2..d7f98d29 100644 --- a/hipcheck/src/policy_exprs/token.rs +++ b/hipcheck/src/policy_exprs/token.rs @@ -1,6 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 +use crate::policy_exprs::error::JiffError; use crate::policy_exprs::F64; +use git2::Time; +use jiff::{ + civil::{Date, DateTime}, + tz::{self, TimeZone}, + Span, Timestamp, Zoned, +}; use logos::{Lexer, Logos}; use ordered_float::FloatIsNan; use std::{ @@ -34,7 +41,16 @@ pub enum Token { #[regex(r"([1-9]?[0-9]*)", lex_integer, priority = 20)] Integer(i64), - #[regex("([a-zA-Z]+)", lex_ident)] + #[regex(r"[0-9]{3,4}-[^\s\)]+", lex_datetime)] + DateTime(Box), + + // In the future this regex *could* be made more specific to reduce collision + // with Ident, or we could introduce a special prefix character like '@' or '#' + #[regex(r"PT?[0-9]+[a-zA-Z][^\s\)]*", lex_span)] + Span(Box), + + // Prioritize over span regex, which starts with a 'P' + #[regex(r"([a-zA-Z]+)", lex_ident, priority = 10)] Ident(String), #[regex(r"\$[/~_[:alnum:]]*", lex_json_pointer)] @@ -68,6 +84,42 @@ fn lex_float(input: &mut Lexer<'_, Token>) -> Result { Ok(F64::new(f)?) } +/// Lex a single datetime value. +fn lex_datetime(input: &mut Lexer<'_, Token>) -> Result> { + let s = input.slice(); + // Parse to a Zoned datetime value with as much detail as given + // If a UTC offset is provided, convert the datetime to the equivalent UTC datetime + if let Ok(timestamp) = s.parse::() { + Ok(Box::new(timestamp.to_zoned(TimeZone::UTC))) + // If no offset is provided, assume the time is UTC + } else if let Ok(dt) = s.parse::() { + dt.to_zoned(TimeZone::UTC) + .map_err(|err| LexingError::InvalidDatetime(s.to_string(), JiffError::new(err))) + .map(Box::new) + } else { + match s.parse::() { + // If no time is provided, treat the time as midnight UTC on the given day + Ok(date) => date + .to_zoned(TimeZone::UTC) + .map_err(|err| LexingError::InvalidDatetime(s.to_string(), JiffError::new(err))) + .map(Box::new), + // If the string provided does not parse to a valid date or datetime, return an error + Err(err) => Err(LexingError::InvalidDatetime( + s.to_string(), + JiffError::new(err), + )), + } + } +} + +/// Lex a time span +fn lex_span(input: &mut Lexer<'_, Token>) -> Result> { + let s = input.slice(); + s.parse::() + .map_err(|err| LexingError::InvalidSpan(s.to_string(), JiffError::new(err))) + .map(Box::new) +} + /// Lex a single identifier. fn lex_ident(input: &mut Lexer<'_, Token>) -> Result { Ok(input.slice().to_owned()) @@ -102,6 +154,8 @@ impl Display for Token { Token::Bool(false) => write!(f, "#f"), Token::Integer(i) => write!(f, "{i}"), Token::Float(fl) => write!(f, "{fl}"), + Token::DateTime(dt) => write!(f, "{dt}"), + Token::Span(span) => write!(f, "{span}"), Token::Ident(i) => write!(f, "{i}"), Token::JSONPointer(pointer) => write!(f, "${pointer}"), } @@ -132,11 +186,21 @@ pub enum LexingError { #[error("invalid JSON Pointer, found '{0}'. JSON Pointers must be empty or start with '/'.")] JSONPointerMissingInitialSlash(String), + + #[error("failed to parse date or datetime")] + InvalidDatetime(String, JiffError), + + #[error("failed to parse span")] + InvalidSpan(String, JiffError), } #[cfg(test)] mod tests { use crate::policy_exprs::{token::Token, Error::Lex, LexingError, Result, F64}; + use jiff::{ + tz::{self, TimeZone}, + Span, Timestamp, Zoned, + }; use logos::Logos as _; use test_log::test; @@ -230,6 +294,57 @@ mod tests { Token::JSONPointer(String::from("/data/one")), Token::CloseParen, ]; + + let tokens = lex(raw_program).unwrap(); + assert_eq!(tokens, expected); + } + + #[test] + fn basic_lexing_with_time() { + let raw_program = "(eq (sub 2024-09-17T09:00-05 2024-09-17T10:30-05) PT1H30M)"; + + let ts1: Timestamp = "2024-09-17T09:00-05".parse().unwrap(); + let dt1 = Zoned::new(ts1, TimeZone::UTC); + let ts2: Timestamp = "2024-09-17T10:30-05".parse().unwrap(); + let dt2 = Zoned::new(ts2, TimeZone::UTC); + let span: Span = "PT1H30M".parse().unwrap(); + + let expected = vec![ + Token::OpenParen, + Token::Ident(String::from("eq")), + Token::OpenParen, + Token::Ident(String::from("sub")), + Token::DateTime(Box::new(dt1)), + Token::DateTime(Box::new(dt2)), + Token::CloseParen, + Token::Span(Box::new(span)), + Token::CloseParen, + ]; + + let tokens = lex(raw_program).unwrap(); + assert_eq!(tokens, expected); + } + + // Ensure that idents with capital P are prioritized over being treated as spans + #[test] + fn regression_lex_span_and_ident() { + let raw_program = "Philip"; + let expected = vec![Token::Ident(String::from("Philip"))]; + + let tokens = lex(raw_program).unwrap(); + assert_eq!(tokens, expected); + + let raw_program = "PT1H30M"; + let span: Span = raw_program.parse().unwrap(); + + let expected = vec![Token::Span(Box::new(span))]; + + let tokens = lex(raw_program).unwrap(); + assert_eq!(tokens, expected); + + let raw_program = "PTBarnum"; + let expected = vec![Token::Ident(String::from("PTBarnum"))]; + let tokens = lex(raw_program).unwrap(); assert_eq!(tokens, expected); }