Add (minimum) support for libarrow 18.0.0

Essentially to fix compiler warnings about missing cases in switch() and fix a deprecation warning. Support for DECIMAL32 and DECIMAL64 should work on the read side, but not actually tested. The ogrlayerarrow.cpp generic code isn't ready for them yet. No support on the write side to limit backwards compatibility issues.
OSGeo · Oct 28, 2024 · 8dc1c70 · 8dc1c70
1 parent 72c88bd
commit 8dc1c70
Show file tree

Hide file tree

Showing 3 changed files with 155 additions and 2 deletions.
diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp
@@ -228,6 +228,10 @@ inline bool OGRArrowLayer::IsHandledListOrMapType(
            itemTypeId == arrow::Type::HALF_FLOAT ||
            itemTypeId == arrow::Type::FLOAT ||
            itemTypeId == arrow::Type::DOUBLE ||
+#if ARROW_VERSION_MAJOR >= 18
+           itemTypeId == arrow::Type::DECIMAL32 ||
+           itemTypeId == arrow::Type::DECIMAL64 ||
+#endif
            itemTypeId == arrow::Type::DECIMAL128 ||
            itemTypeId == arrow::Type::DECIMAL256 ||
            itemTypeId == arrow::Type::STRING ||
@@ -422,6 +426,10 @@ inline bool OGRArrowLayer::MapArrowTypeToOGR(
                                    // nanosecond accuracy
             break;
 
+#if ARROW_VERSION_MAJOR >= 18
+        case arrow::Type::DECIMAL32:
+        case arrow::Type::DECIMAL64:
+#endif
         case arrow::Type::DECIMAL128:
         case arrow::Type::DECIMAL256:
         {
@@ -468,6 +476,10 @@ inline bool OGRArrowLayer::MapArrowTypeToOGR(
                     eSubType = OFSTFloat32;
                     break;
                 case arrow::Type::DOUBLE:
+#if ARROW_VERSION_MAJOR >= 18
+                case arrow::Type::DECIMAL32:
+                case arrow::Type::DECIMAL64:
+#endif
                 case arrow::Type::DECIMAL128:
                 case arrow::Type::DECIMAL256:
                     eType = OFTRealList;
@@ -1290,6 +1302,23 @@ static void AddToArray(CPLJSONArray &oArray, const arrow::Array *array,
                 static_cast<const arrow::DoubleArray *>(array)->Value(nIdx));
             break;
         }
+
+#if ARROW_VERSION_MAJOR >= 18
+        case arrow::Type::DECIMAL32:
+        {
+            oArray.Add(CPLAtof(static_cast<const arrow::Decimal32Array *>(array)
+                                   ->FormatValue(nIdx)
+                                   .c_str()));
+            break;
+        }
+        case arrow::Type::DECIMAL64:
+        {
+            oArray.Add(CPLAtof(static_cast<const arrow::Decimal64Array *>(array)
+                                   ->FormatValue(nIdx)
+                                   .c_str()));
+            break;
+        }
+#endif
         case arrow::Type::DECIMAL128:
         {
             oArray.Add(
@@ -1470,6 +1499,25 @@ static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
                 static_cast<const arrow::DoubleArray *>(array)->Value(nIdx));
             break;
         }
+
+#if ARROW_VERSION_MAJOR >= 18
+        case arrow::Type::DECIMAL32:
+        {
+            oDict.Add(osKey,
+                      CPLAtof(static_cast<const arrow::Decimal32Array *>(array)
+                                  ->FormatValue(nIdx)
+                                  .c_str()));
+            break;
+        }
+        case arrow::Type::DECIMAL64:
+        {
+            oDict.Add(osKey,
+                      CPLAtof(static_cast<const arrow::Decimal64Array *>(array)
+                                  ->FormatValue(nIdx)
+                                  .c_str()));
+            break;
+        }
+#endif
         case arrow::Type::DECIMAL128:
         {
             oDict.Add(osKey,
@@ -1757,6 +1805,48 @@ static void ReadList(OGRFeature *poFeature, int i, int64_t nIdxInArray,
             break;
         }
 
+#if ARROW_VERSION_MAJOR >= 18
+        case arrow::Type::DECIMAL32:
+        {
+            const auto values = std::static_pointer_cast<arrow::Decimal32Array>(
+                array->values());
+            const auto nIdxStart = array->value_offset(nIdxInArray);
+            const int nCount = array->value_length(nIdxInArray);
+            std::vector<double> aValues;
+            aValues.reserve(nCount);
+            for (int k = 0; k < nCount; k++)
+            {
+                if (values->IsNull(nIdxStart + k))
+                    aValues.push_back(std::numeric_limits<double>::quiet_NaN());
+                else
+                    aValues.push_back(
+                        CPLAtof(values->FormatValue(nIdxStart + k).c_str()));
+            }
+            poFeature->SetField(i, nCount, aValues.data());
+            break;
+        }
+
+        case arrow::Type::DECIMAL64:
+        {
+            const auto values = std::static_pointer_cast<arrow::Decimal64Array>(
+                array->values());
+            const auto nIdxStart = array->value_offset(nIdxInArray);
+            const int nCount = array->value_length(nIdxInArray);
+            std::vector<double> aValues;
+            aValues.reserve(nCount);
+            for (int k = 0; k < nCount; k++)
+            {
+                if (values->IsNull(nIdxStart + k))
+                    aValues.push_back(std::numeric_limits<double>::quiet_NaN());
+                else
+                    aValues.push_back(
+                        CPLAtof(values->FormatValue(nIdxStart + k).c_str()));
+            }
+            poFeature->SetField(i, nCount, aValues.data());
+            break;
+        }
+#endif
+
         case arrow::Type::DECIMAL128:
         {
             const auto values =
@@ -2406,6 +2496,26 @@ inline OGRFeature *OGRArrowLayer::ReadFeature(
                 break;
             }
 
+#if ARROW_VERSION_MAJOR >= 18
+            case arrow::Type::DECIMAL32:
+            {
+                const auto castArray =
+                    static_cast<const arrow::Decimal32Array *>(array);
+                poFeature->SetField(
+                    i, CPLAtof(castArray->FormatValue(nIdxInBatch).c_str()));
+                break;
+            }
+
+            case arrow::Type::DECIMAL64:
+            {
+                const auto castArray =
+                    static_cast<const arrow::Decimal64Array *>(array);
+                poFeature->SetField(
+                    i, CPLAtof(castArray->FormatValue(nIdxInBatch).c_str()));
+                break;
+            }
+#endif
+
             case arrow::Type::DECIMAL128:
             {
                 const auto castArray =
@@ -3900,6 +4010,34 @@ inline bool OGRArrowLayer::SkipToNextFeatureDueToAttributeFilter() const
                 break;
             }
 
+#if ARROW_VERSION_MAJOR >= 18
+            case arrow::Type::DECIMAL32:
+            {
+                const auto castArray =
+                    static_cast<const arrow::Decimal32Array *>(array);
+                if (!ConstraintEvaluator(
+                        constraint,
+                        CPLAtof(castArray->FormatValue(m_nIdxInBatch).c_str())))
+                {
+                    return true;
+                }
+                break;
+            }
+
+            case arrow::Type::DECIMAL64:
+            {
+                const auto castArray =
+                    static_cast<const arrow::Decimal64Array *>(array);
+                if (!ConstraintEvaluator(
+                        constraint,
+                        CPLAtof(castArray->FormatValue(m_nIdxInBatch).c_str())))
+                {
+                    return true;
+                }
+                break;
+            }
+#endif
+
             case arrow::Type::DECIMAL128:
             {
                 const auto castArray =

diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp
@@ -184,7 +184,19 @@ inline void OGRArrowWriterLayer::CreateSchemaCommon()
             {
                 const int nPrecision = poFieldDefn->GetPrecision();
                 if (nWidth != 0 && nPrecision != 0)
-                    dt = arrow::decimal(nWidth, nPrecision);
+                {
+                    // Since arrow 18.0, we could use arrow::smallest_decimal()
+                    // to return the smallest representation (i.e. possibly
+                    // decimal32 and decimal64). But for now keep decimal128
+                    // as the minimum for backwards compatibility.
+                    // GetValueDecimal() and other functions in
+                    // ogrlayerarrow.cpp would have to be adapted for decimal32
+                    // and decimal64 compatibility.
+                    if (nWidth > 38)
+                        dt = arrow::decimal256(nWidth, nPrecision);
+                    else
+                        dt = arrow::decimal128(nWidth, nPrecision);
+                }
                 else if (eSubDT == OFSTFloat32)
                     dt = arrow::float32();
                 else

diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp
@@ -1427,6 +1427,10 @@ bool OGRParquetLayer::ReadNextBatch()
 {
     m_nIdxInBatch = 0;
 
+    const int nNumGroups = m_poArrowReader->num_row_groups();
+    if (nNumGroups == 0)
+        return false;
+
     if (m_bSingleBatch)
     {
         CPLAssert(m_iRecordBatch == 0);
@@ -1468,7 +1472,6 @@ bool OGRParquetLayer::ReadNextBatch()
         }
         else
         {
-            const int nNumGroups = m_poArrowReader->num_row_groups();
             OGRField sMin;
             OGRField sMax;
             OGR_RawField_SetNull(&sMin);