diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 4abe84b..b9b7710 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -234,6 +234,25 @@ void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t } } +enum class parse_error { + no_error, + // [JSON-only] The minus sign must be followed by an integer. + missing_integer_after_sign, + // A sign must be followed by an integer or dot. + missing_integer_or_dot_after_sign, + // [JSON-only] The integer part must not have leading zeros. + leading_zeros_in_integer_part, + // [JSON-only] The integer part must have at least one digit. + no_digits_in_integer_part, + // [JSON-only] If there is a decimal point, there must be digits in the + // fractional part. + no_digits_in_fractional_part, + // The mantissa must have at least one digit. + no_digits_in_mantissa, + // Scientific notation requires an exponential part. + missing_exponential_part, +}; + template struct parsed_number_string_t { int64_t exponent{0}; @@ -245,11 +264,22 @@ struct parsed_number_string_t { // contains the range of the significant digits span integer{}; // non-nullable span fraction{}; // nullable + parse_error error{parse_error::no_error}; }; using byte_span = span; using parsed_number_string = parsed_number_string_t; +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t +report_parse_error(UC const* p, parse_error error) { + parsed_number_string_t answer; + answer.valid = false; + answer.lastmatch = p; + answer.error = error; + return answer; +} + // Assuming that you use no more than 19 digits, this will // parse an ASCII string. template @@ -269,15 +299,16 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par #endif ++p; if (p == pend) { - return answer; + return report_parse_error( + p, parse_error::missing_integer_or_dot_after_sign); } if (fmt & FASTFLOAT_JSONFMT) { if (!is_integer(*p)) { // a sign must be followed by an integer - return answer; + return report_parse_error(p, parse_error::missing_integer_after_sign); } } else { if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot - return answer; + return report_parse_error(p, parse_error::missing_integer_or_dot_after_sign); } } } @@ -297,8 +328,12 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par answer.integer = span(start_digits, size_t(digit_count)); if (fmt & FASTFLOAT_JSONFMT) { // at least 1 digit in integer part, without leading zeros - if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) { - return answer; + if (digit_count == 0) { + return report_parse_error(p, parse_error::no_digits_in_integer_part); + } + if ((start_digits[0] == UC('0') && digit_count > 1)) { + return report_parse_error(start_digits, + parse_error::leading_zeros_in_integer_part); } } @@ -323,11 +358,10 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par if (fmt & FASTFLOAT_JSONFMT) { // at least 1 digit in fractional part if (has_decimal_point && exponent == 0) { - return answer; + return report_parse_error(p, parse_error::no_digits_in_fractional_part); } - } - else if (digit_count == 0) { // we must have encountered at least one integer! - return answer; + } else if (digit_count == 0) { // we must have encountered at least one integer! + return report_parse_error(p, parse_error::no_digits_in_mantissa); } int64_t exp_number = 0; // explicit exponential part if ( ((fmt & chars_format::scientific) && @@ -350,8 +384,10 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par } if ((p == pend) || !is_integer(*p)) { if(!(fmt & chars_format::fixed)) { - // We are in error. - return answer; + // The exponential part is invalid for scientific notation, so it must + // be a trailing token for fixed notation. However, fixed notation is + // disabled, so report a scientific notation error. + return report_parse_error(p, parse_error::missing_exponential_part); } // Otherwise, we will be ignoring the 'e'. p = location_of_e; @@ -368,7 +404,9 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par } } else { // If it scientific and not fixed, we have to bail out. - if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } + if ((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { + return report_parse_error(p, parse_error::missing_exponential_part); + } } answer.lastmatch = p; answer.valid = true; diff --git a/tests/json_fmt.cpp b/tests/json_fmt.cpp index a73b4b2..c4ff52a 100644 --- a/tests/json_fmt.cpp +++ b/tests/json_fmt.cpp @@ -45,6 +45,15 @@ struct AcceptedValue { ExpectedResult expected; }; +struct RejectReason { + fast_float::parse_error error; + intptr_t location_offset; +}; +struct RejectedValue { + std::string input; + RejectReason reason; +}; + int main() { const std::vector accept{ {"-0.2", {-0.2, ""}}, @@ -55,8 +64,18 @@ int main() { {"1e", {1., "e"}}, {"1e+", {1., "e+"}}, {"inf", {std::numeric_limits::infinity(), ""}}}; - const std::vector reject{"-.2", "00.02", "0.e+1", "00.e+1", - ".25", "+0.25", "inf", "nan(snan)"}; + const std::vector reject{ + {"-.2", {fast_float::parse_error::missing_integer_after_sign, 1}}, + {"00.02", {fast_float::parse_error::leading_zeros_in_integer_part, 0}}, + {"0.e+1", {fast_float::parse_error::no_digits_in_fractional_part, 2}}, + {"00.e+1", {fast_float::parse_error::leading_zeros_in_integer_part, 0}}, + {".25", {fast_float::parse_error::no_digits_in_integer_part, 0}}, + // The following cases already start as invalid JSON, so they are + // handled as trailing junk and the error is for not having digits in the + // empty string before the invalid token. + {"+0.25", {fast_float::parse_error::no_digits_in_integer_part, 0}}, + {"inf", {fast_float::parse_error::no_digits_in_integer_part, 0}}, + {"nan(snan)", {fast_float::parse_error::no_digits_in_integer_part, 0}}}; for (std::size_t i = 0; i < accept.size(); ++i) { @@ -80,7 +99,7 @@ int main() { for (std::size_t i = 0; i < reject.size(); ++i) { - const auto& s = reject[i]; + const auto& s = reject[i].input; double result; auto answer = fast_float::from_chars(s.data(), s.data() + s.size(), result, fast_float::chars_format::json); if (answer.ec == std::errc()) { @@ -89,6 +108,31 @@ int main() { } } + for (std::size_t i = 0; i < reject.size(); ++i) + { + const auto& f = reject[i].input; + const auto& expected_reason = reject[i].reason; + auto answer = fast_float::parse_number_string( + f.data(), f.data() + f.size(), + fast_float::parse_options(fast_float::chars_format::json)); + if (answer.valid) { + std::cerr << "json parse accepted invalid json " << f << std::endl; + return EXIT_FAILURE; + } + if (answer.error != expected_reason.error) { + std::cerr << "json parse failure had invalid error reason " << f + << std::endl; + return EXIT_FAILURE; + } + intptr_t error_location = answer.lastmatch - f.data(); + if (error_location != expected_reason.location_offset) { + std::cerr << "json parse failure had invalid error location " << f + << " (expected " << expected_reason.location_offset << " got " + << error_location << ")" << std::endl; + return EXIT_FAILURE; + } + } + if(main_readme() != EXIT_SUCCESS) { return EXIT_FAILURE; } if(main_readme2() != EXIT_SUCCESS) { return EXIT_FAILURE; }