diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index af0c8e2c9807..a52cf4e4227f 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -228,6 +228,10 @@ inline bool OGRArrowLayer::IsHandledListOrMapType( itemTypeId == arrow::Type::HALF_FLOAT || itemTypeId == arrow::Type::FLOAT || itemTypeId == arrow::Type::DOUBLE || +#if ARROW_VERSION_MAJOR >= 18 + itemTypeId == arrow::Type::DECIMAL32 || + itemTypeId == arrow::Type::DECIMAL64 || +#endif itemTypeId == arrow::Type::DECIMAL128 || itemTypeId == arrow::Type::DECIMAL256 || itemTypeId == arrow::Type::STRING || @@ -422,6 +426,10 @@ inline bool OGRArrowLayer::MapArrowTypeToOGR( // nanosecond accuracy break; +#if ARROW_VERSION_MAJOR >= 18 + case arrow::Type::DECIMAL32: + case arrow::Type::DECIMAL64: +#endif case arrow::Type::DECIMAL128: case arrow::Type::DECIMAL256: { @@ -468,6 +476,10 @@ inline bool OGRArrowLayer::MapArrowTypeToOGR( eSubType = OFSTFloat32; break; case arrow::Type::DOUBLE: +#if ARROW_VERSION_MAJOR >= 18 + case arrow::Type::DECIMAL32: + case arrow::Type::DECIMAL64: +#endif case arrow::Type::DECIMAL128: case arrow::Type::DECIMAL256: eType = OFTRealList; @@ -1290,6 +1302,23 @@ static void AddToArray(CPLJSONArray &oArray, const arrow::Array *array, static_cast(array)->Value(nIdx)); break; } + +#if ARROW_VERSION_MAJOR >= 18 + case arrow::Type::DECIMAL32: + { + oArray.Add(CPLAtof(static_cast(array) + ->FormatValue(nIdx) + .c_str())); + break; + } + case arrow::Type::DECIMAL64: + { + oArray.Add(CPLAtof(static_cast(array) + ->FormatValue(nIdx) + .c_str())); + break; + } +#endif case arrow::Type::DECIMAL128: { oArray.Add( @@ -1470,6 +1499,25 @@ static void AddToDict(CPLJSONObject &oDict, const std::string &osKey, static_cast(array)->Value(nIdx)); break; } + +#if ARROW_VERSION_MAJOR >= 18 + case arrow::Type::DECIMAL32: + { + oDict.Add(osKey, + CPLAtof(static_cast(array) + ->FormatValue(nIdx) + .c_str())); + break; + } + case arrow::Type::DECIMAL64: + { + oDict.Add(osKey, + CPLAtof(static_cast(array) + ->FormatValue(nIdx) + .c_str())); + break; + } +#endif case arrow::Type::DECIMAL128: { oDict.Add(osKey, @@ -1757,6 +1805,48 @@ static void ReadList(OGRFeature *poFeature, int i, int64_t nIdxInArray, break; } +#if ARROW_VERSION_MAJOR >= 18 + case arrow::Type::DECIMAL32: + { + const auto values = std::static_pointer_cast( + array->values()); + const auto nIdxStart = array->value_offset(nIdxInArray); + const int nCount = array->value_length(nIdxInArray); + std::vector aValues; + aValues.reserve(nCount); + for (int k = 0; k < nCount; k++) + { + if (values->IsNull(nIdxStart + k)) + aValues.push_back(std::numeric_limits::quiet_NaN()); + else + aValues.push_back( + CPLAtof(values->FormatValue(nIdxStart + k).c_str())); + } + poFeature->SetField(i, nCount, aValues.data()); + break; + } + + case arrow::Type::DECIMAL64: + { + const auto values = std::static_pointer_cast( + array->values()); + const auto nIdxStart = array->value_offset(nIdxInArray); + const int nCount = array->value_length(nIdxInArray); + std::vector aValues; + aValues.reserve(nCount); + for (int k = 0; k < nCount; k++) + { + if (values->IsNull(nIdxStart + k)) + aValues.push_back(std::numeric_limits::quiet_NaN()); + else + aValues.push_back( + CPLAtof(values->FormatValue(nIdxStart + k).c_str())); + } + poFeature->SetField(i, nCount, aValues.data()); + break; + } +#endif + case arrow::Type::DECIMAL128: { const auto values = @@ -2406,6 +2496,26 @@ inline OGRFeature *OGRArrowLayer::ReadFeature( break; } +#if ARROW_VERSION_MAJOR >= 18 + case arrow::Type::DECIMAL32: + { + const auto castArray = + static_cast(array); + poFeature->SetField( + i, CPLAtof(castArray->FormatValue(nIdxInBatch).c_str())); + break; + } + + case arrow::Type::DECIMAL64: + { + const auto castArray = + static_cast(array); + poFeature->SetField( + i, CPLAtof(castArray->FormatValue(nIdxInBatch).c_str())); + break; + } +#endif + case arrow::Type::DECIMAL128: { const auto castArray = @@ -3900,6 +4010,34 @@ inline bool OGRArrowLayer::SkipToNextFeatureDueToAttributeFilter() const break; } +#if ARROW_VERSION_MAJOR >= 18 + case arrow::Type::DECIMAL32: + { + const auto castArray = + static_cast(array); + if (!ConstraintEvaluator( + constraint, + CPLAtof(castArray->FormatValue(m_nIdxInBatch).c_str()))) + { + return true; + } + break; + } + + case arrow::Type::DECIMAL64: + { + const auto castArray = + static_cast(array); + if (!ConstraintEvaluator( + constraint, + CPLAtof(castArray->FormatValue(m_nIdxInBatch).c_str()))) + { + return true; + } + break; + } +#endif + case arrow::Type::DECIMAL128: { const auto castArray = diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp index 8f23b3c34cf6..1a8a2e7dcbc1 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp @@ -184,7 +184,19 @@ inline void OGRArrowWriterLayer::CreateSchemaCommon() { const int nPrecision = poFieldDefn->GetPrecision(); if (nWidth != 0 && nPrecision != 0) - dt = arrow::decimal(nWidth, nPrecision); + { + // Since arrow 18.0, we could use arrow::smallest_decimal() + // to return the smallest representation (i.e. possibly + // decimal32 and decimal64). But for now keep decimal128 + // as the minimum for backwards compatibility. + // GetValueDecimal() and other functions in + // ogrlayerarrow.cpp would have to be adapted for decimal32 + // and decimal64 compatibility. + if (nWidth > 38) + dt = arrow::decimal256(nWidth, nPrecision); + else + dt = arrow::decimal128(nWidth, nPrecision); + } else if (eSubDT == OFSTFloat32) dt = arrow::float32(); else diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp index fc8013301e33..e613c0f1d921 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp @@ -1427,6 +1427,10 @@ bool OGRParquetLayer::ReadNextBatch() { m_nIdxInBatch = 0; + const int nNumGroups = m_poArrowReader->num_row_groups(); + if (nNumGroups == 0) + return false; + if (m_bSingleBatch) { CPLAssert(m_iRecordBatch == 0); @@ -1468,7 +1472,6 @@ bool OGRParquetLayer::ReadNextBatch() } else { - const int nNumGroups = m_poArrowReader->num_row_groups(); OGRField sMin; OGRField sMax; OGR_RawField_SetNull(&sMin);