From 1e7927e006a94399e65974f0091e7cfc84dee149 Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Sat, 15 Jun 2024 15:34:25 +0800 Subject: [PATCH 1/2] feat: support nan/inf/-inf as float/double default --- lang/rust/avro/src/types.rs | 19 +++++++++++++++++++ lang/rust/avro/tests/io.rs | 29 ++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs index 9b88d8c8bfa..d4bf8cddcfc 100644 --- a/lang/rust/avro/src/types.rs +++ b/lang/rust/avro/src/types.rs @@ -860,6 +860,10 @@ impl Value { Value::Long(n) => Ok(Value::Float(n as f32)), Value::Float(x) => Ok(Value::Float(x)), Value::Double(x) => Ok(Value::Float(x as f32)), + Value::String(x) => match Self::parse_special_float(&x) { + Some(f) => Ok(Value::Float(f)), + None => Err(Error::GetFloat(ValueKind::String)), + }, other => Err(Error::GetFloat(other.into())), } } @@ -870,10 +874,25 @@ impl Value { Value::Long(n) => Ok(Value::Double(n as f64)), Value::Float(x) => Ok(Value::Double(f64::from(x))), Value::Double(x) => Ok(Value::Double(x)), + Value::String(x) => match Self::parse_special_float(&x) { + Some(f) => Ok(Value::Double(f.into())), + None => Err(Error::GetDouble(ValueKind::String)), + }, other => Err(Error::GetDouble(other.into())), } } + /// IEEE 754 NaN and infinities are not valid JSON numbers. + /// So they are represented in JSON as strings. + fn parse_special_float(s: &str) -> Option { + match s.trim().to_ascii_lowercase().as_str() { + "nan" | "+nan" | "-nan" => Some(f32::NAN), + "inf" | "+inf" | "infinity" | "+infinity" => Some(f32::INFINITY), + "-inf" | "-infinity" => Some(f32::NEG_INFINITY), + _ => None, + } + } + /// fn encode_iso_8859_1(s: &str) -> Result, Error> { s.chars() diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs index 44dfc192aa0..78e9b0200dd 100644 --- a/lang/rust/avro/tests/io.rs +++ b/lang/rust/avro/tests/io.rs @@ -62,6 +62,9 @@ lazy_static! { (r#""long""#, "5", Value::Long(5)), (r#""float""#, "1.1", Value::Float(1.1)), (r#""double""#, "1.1", Value::Double(1.1)), + (r#""float""#, r#"" +inf ""#, Value::Float(f32::INFINITY)), + (r#""double""#, r#""-Infinity""#, Value::Double(f64::NEG_INFINITY)), + (r#""double""#, r#""-NAN""#, Value::Double(f64::NAN)), (r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""a""#, Value::Fixed(1, vec![97])), // ASCII 'a' => one byte (r#"{"type": "fixed", "name": "F", "size": 2}"#, r#""\u00FF""#, Value::Fixed(1, vec![255])), // The value is between U+0080 and U+07FF => ISO-8859-1 (r#"{"type": "enum", "name": "F", "symbols": ["FOO", "BAR"]}"#, r#""FOO""#, Value::Enum(0, "FOO".to_string())), @@ -264,11 +267,27 @@ fn test_default_value() -> TestResult { &mut Cursor::new(encoded), Some(&reader_schema), )?; - assert_eq!( - datum_read, datum_to_read, - "{} -> {}", - *field_type, *default_json - ); + match default_datum { + Value::Double(f) if f.is_nan() => { + let Value::Record(fields) = datum_read else { + panic!() + }; + let Value::Double(f) = fields[0].1 else { + panic!("double expected") + }; + assert!( + f.is_nan(), + "{field_type} -> {default_json} is parsed as {f} rather than NaN" + ); + } + _ => { + assert_eq!( + datum_read, datum_to_read, + "{} -> {}", + *field_type, *default_json + ); + } + } } Ok(()) From c38717455f8d9cb5facd83da3708d0840658c887 Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Tue, 18 Jun 2024 13:59:22 +0800 Subject: [PATCH 2/2] update test unreachable message --- lang/rust/avro/tests/io.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs index 78e9b0200dd..e102e1f662c 100644 --- a/lang/rust/avro/tests/io.rs +++ b/lang/rust/avro/tests/io.rs @@ -270,7 +270,7 @@ fn test_default_value() -> TestResult { match default_datum { Value::Double(f) if f.is_nan() => { let Value::Record(fields) = datum_read else { - panic!() + unreachable!("the test always constructs top level as record") }; let Value::Double(f) = fields[0].1 else { panic!("double expected")