Skip to content

Commit

Permalink
Update README + unit tests RE: decimal parsing
Browse files Browse the repository at this point in the history
vincentlaucsb committed Jun 6, 2024
1 parent d76acaf commit 985966e
Showing 7 changed files with 123 additions and 45 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -205,6 +205,12 @@ for (auto& row: reader) {
std::cout << "Hex value is " << value << std::endl;
}

// Non-imperial decimal numbers can be handled this way
long double decimalValue;
if (row["decimalNumber"].try_parse_decimal(decimalValue, ',')) {
std::cout << "Decimal value is " << decimalValue << std::endl;
}

// ..
}
}
10 changes: 4 additions & 6 deletions include/internal/csv_row.cpp
Original file line number Diff line number Diff line change
@@ -164,18 +164,16 @@ namespace csv {
return true;
}

// try_parse_decimal uses the specified decimal symbol and
// also sets the private members _type and value
CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalsymbol) {
CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) {
// If field has already been parsed to empty, no need to do it aagin:
if (this->_type == DataType::CSV_NULL)
return false;

// Not yet parsed or possibly parsed with other decimalsymbol
// Not yet parsed or possibly parsed with other decimalSymbol
if (this->_type == DataType::UNKNOWN || this->_type == DataType::CSV_STRING || this->_type == DataType::CSV_DOUBLE)
this->_type = internals::data_type(this->sv, &this->value, decimalsymbol); // parse again
this->_type = internals::data_type(this->sv, &this->value, decimalSymbol); // parse again

// Integral types are not affected by decimalsymbol and need not be parsed again
// Integral types are not affected by decimalSymbol and need not be parsed again

// Either we already had an integral type before, or we we just got any numeric type now.
if (this->_type >= DataType::CSV_INT8 && this->_type <= DataType::CSV_DOUBLE) {
10 changes: 6 additions & 4 deletions include/internal/csv_row.hpp
Original file line number Diff line number Diff line change
@@ -217,11 +217,13 @@ namespace csv {
/** Parse a hexadecimal value, returning false if the value is not hex. */
bool try_parse_hex(int& parsedValue);

/** Parse a value, returning false if the value is not decimal.
* If true it also sets the private members _type and value.
* Decimal symbol may be given explicitly, default is '.'.
/** Attempts to parse a decimal (or integer) value using the given symbol,
* returning `true` if the value is numeric.
*
* @note This method also updates this field's type
*
*/
bool try_parse_decimal(long double& dVal, const char decimalsymbol = '.');
bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.');

/** Compares the contents of this field to a numeric value. If this
* field does not contain a numeric value, then all comparisons return
14 changes: 7 additions & 7 deletions include/internal/data_type.hpp
Original file line number Diff line number Diff line change
@@ -235,11 +235,11 @@ namespace csv {
* @param[in] in String value to be examined
* @param[out] out Pointer to long double where results of numeric parsing
* get stored
* @param[in] decimalsymbol the character separating integral and decimal part,
* @param[in] decimalSymbol the character separating integral and decimal part,
* defaults to '.' if omitted
*/
CONSTEXPR_14
DataType data_type(csv::string_view in, long double* const out, const char decimalsymbol) {
DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) {
// Empty string --> NULL
if (in.size() == 0)
return DataType::CSV_NULL;
@@ -285,7 +285,7 @@ namespace csv {

is_negative = true;
break;
// case decimalsymbol: not allowed because decimalsymbol is not a literal,
// case decimalSymbol: not allowed because decimalSymbol is not a literal,
// it is handled in the default block
case 'e':
case 'E':
@@ -325,10 +325,10 @@ namespace csv {
else
integral_part = (integral_part * 10) + digit;
}
// case decimalymbol: not allowed because decimalsymbol is not a literal.
else if (dot_allowed && current == decimalsymbol) {
dot_allowed = false;
prob_float = true;
// case decimalSymbol: not allowed because decimalSymbol is not a literal.
else if (dot_allowed && current == decimalSymbol) {
dot_allowed = false;
prob_float = true;
}
else {
return DataType::CSV_STRING;
36 changes: 18 additions & 18 deletions single_include/csv.hpp
Original file line number Diff line number Diff line change
@@ -5295,11 +5295,11 @@ namespace csv {
* @param[in] in String value to be examined
* @param[out] out Pointer to long double where results of numeric parsing
* get stored
* @param[in] decimalsymbol the character separating integral and decimal part,
* @param[in] decimalSymbol the character separating integral and decimal part,
* defaults to '.' if omitted
*/
CONSTEXPR_14
DataType data_type(csv::string_view in, long double* const out, const char decimalsymbol) {
DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) {
// Empty string --> NULL
if (in.size() == 0)
return DataType::CSV_NULL;
@@ -5345,7 +5345,7 @@ namespace csv {

is_negative = true;
break;
// case decimalsymbol: not allowed because decimalsymbol is not a literal,
// case decimalSymbol: not allowed because decimalSymbol is not a literal,
// it is handled in the default block
case 'e':
case 'E':
@@ -5385,10 +5385,10 @@ namespace csv {
else
integral_part = (integral_part * 10) + digit;
}
// case decimalymbol: not allowed because decimalsymbol is not a literal.
else if (dot_allowed && current == decimalsymbol) {
dot_allowed = false;
prob_float = true;
// case decimalSymbol: not allowed because decimalSymbol is not a literal.
else if (dot_allowed && current == decimalSymbol) {
dot_allowed = false;
prob_float = true;
}
else {
return DataType::CSV_STRING;
@@ -5612,11 +5612,13 @@ namespace csv {
/** Parse a hexadecimal value, returning false if the value is not hex. */
bool try_parse_hex(int& parsedValue);

/** Parse a value, returning false if the value is not decimal.
* If true it also sets the private members _type and value.
* Decimal symbol may be given explicitly, default is '.'.
/** Attempts to parse a decimal (or integer) value using the given symbol,
* returning `true` if the value is numeric.
*
* @note This method also updates this field's type
*
*/
bool try_parse_decimal(long double& dVal, const char decimalsymbol = '.');
bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.');

/** Compares the contents of this field to a numeric value. If this
* field does not contain a numeric value, then all comparisons return
@@ -7855,18 +7857,16 @@ namespace csv {
return true;
}

// try_parse_decimal uses the specified decimal symbol and
// also sets the private members _type and value
CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalsymbol) {
CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) {
// If field has already been parsed to empty, no need to do it aagin:
if (this->_type == DataType::CSV_NULL)
return false;
return false;

// Not yet parsed or possibly parsed with other decimalsymbol
// Not yet parsed or possibly parsed with other decimalSymbol
if (this->_type == DataType::UNKNOWN || this->_type == DataType::CSV_STRING || this->_type == DataType::CSV_DOUBLE)
this->_type = internals::data_type(this->sv, &this->value, decimalsymbol); // parse again
this->_type = internals::data_type(this->sv, &this->value, decimalSymbol); // parse again

// Integral types are not affected by decimalsymbol and need not be parsed again
// Integral types are not affected by decimalSymbol and need not be parsed again

// Either we already had an integral type before, or we we just got any numeric type now.
if (this->_type >= DataType::CSV_INT8 && this->_type <= DataType::CSV_DOUBLE) {
51 changes: 41 additions & 10 deletions single_include_test/csv.hpp
Original file line number Diff line number Diff line change
@@ -5151,7 +5151,8 @@ namespace csv {
template<> inline DataType type_num<std::nullptr_t>() { return DataType::CSV_NULL; }
template<> inline DataType type_num<std::string>() { return DataType::CSV_STRING; }

CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr);
CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr,
const char decimalsymbol = '.');
#endif

/** Given a byte size, return the largest number than can be stored in
@@ -5294,9 +5295,11 @@ namespace csv {
* @param[in] in String value to be examined
* @param[out] out Pointer to long double where results of numeric parsing
* get stored
* @param[in] decimalSymbol the character separating integral and decimal part,
* defaults to '.' if omitted
*/
CONSTEXPR_14
DataType data_type(csv::string_view in, long double* const out) {
DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) {
// Empty string --> NULL
if (in.size() == 0)
return DataType::CSV_NULL;
@@ -5342,14 +5345,8 @@ namespace csv {

is_negative = true;
break;
case '.':
if (!dot_allowed) {
return DataType::CSV_STRING;
}

dot_allowed = false;
prob_float = true;
break;
// case decimalSymbol: not allowed because decimalSymbol is not a literal,
// it is handled in the default block
case 'e':
case 'E':
// Process scientific notation
@@ -5388,6 +5385,11 @@ namespace csv {
else
integral_part = (integral_part * 10) + digit;
}
// case decimalSymbol: not allowed because decimalSymbol is not a literal.
else if (dot_allowed && current == decimalSymbol) {
dot_allowed = false;
prob_float = true;
}
else {
return DataType::CSV_STRING;
}
@@ -5610,6 +5612,14 @@ namespace csv {
/** Parse a hexadecimal value, returning false if the value is not hex. */
bool try_parse_hex(int& parsedValue);

/** Attempts to parse a decimal (or integer) value using the given symbol,
* returning `true` if the value is numeric.
*
* @note This method also updates this field's type
*
*/
bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.');

/** Compares the contents of this field to a numeric value. If this
* field does not contain a numeric value, then all comparisons return
* false.
@@ -7847,6 +7857,27 @@ namespace csv {
return true;
}

CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) {
// If field has already been parsed to empty, no need to do it aagin:
if (this->_type == DataType::CSV_NULL)
return false;

// Not yet parsed or possibly parsed with other decimalSymbol
if (this->_type == DataType::UNKNOWN || this->_type == DataType::CSV_STRING || this->_type == DataType::CSV_DOUBLE)
this->_type = internals::data_type(this->sv, &this->value, decimalSymbol); // parse again

// Integral types are not affected by decimalSymbol and need not be parsed again

// Either we already had an integral type before, or we we just got any numeric type now.
if (this->_type >= DataType::CSV_INT8 && this->_type <= DataType::CSV_DOUBLE) {
dVal = this->value;
return true;
}

// CSV_NULL or CSV_STRING, not numeric
return false;
}

#ifdef _MSC_VER
#pragma region CSVRow Iterator
#endif
41 changes: 41 additions & 0 deletions tests/test_csv_field.cpp
Original file line number Diff line number Diff line change
@@ -141,6 +141,47 @@ TEST_CASE("CSVField try_parse_hex()", "[test_csv_field_parse_hex]") {
}
}


TEST_CASE("CSVField try_parse_decimal()", "[test_csv_field_parse_hex]") {
SECTION("Test try_parse_decimal() with non-numeric value") {
long double output = 0;
std::string input = "stroustrup";
CSVField testField(input);

REQUIRE(testField.try_parse_decimal(output, ',') == false);
REQUIRE(testField.type() == DataType::CSV_STRING);
}

SECTION("Test try_parse_decimal() with integer value") {
long double output = 0;
std::string input = "2024";
CSVField testField(input);

REQUIRE(testField.try_parse_decimal(output, ',') == true);
REQUIRE(testField.type() == DataType::CSV_INT16);
REQUIRE(internals::is_equal(output, 2024.0l));
}

SECTION("Test try_parse_decimal() with various valid values") {
std::string input;
long double output = 0;
long double expected = 0;

std::tie(input, expected) =
GENERATE(table<std::string, long double>(
csv_test::FLOAT_TEST_CASES));

// Replace '.' with ','
std::replace(input.begin(), input.end(), '.', ',');

CSVField testField(input);

REQUIRE(testField.try_parse_decimal(output, ',') == true);
REQUIRE(testField.type() == DataType::CSV_DOUBLE);
REQUIRE(internals::is_equal(output, expected));
}
}

TEMPLATE_TEST_CASE("CSVField get<>() - Disallow Float to Int", "[test_csv_field_get_float_as_int]",
unsigned char, unsigned short, unsigned int, unsigned long long int,
signed char, short, int, long long int) {

0 comments on commit 985966e

Please sign in to comment.