Add (minimum) support for libarrow 18.0.0

rouault · rouault · commit 59916e217940 · 2024-10-29T14:47:49.000+01:00
Essentially to fix compiler warnings about missing cases in switch() and
fix a deprecation warning.

Support for DECIMAL32 and DECIMAL64 should work on the read side, but
not actually tested. The ogrlayerarrow.cpp generic code isn't ready for
them yet.
No support on the write side to limit backwards compatibility issues.
diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp
@@ -244,6 +244,10 @@ inline bool OGRArrowLayer::IsHandledListOrMapType(
            itemTypeId == arrow::Type::HALF_FLOAT ||
            itemTypeId == arrow::Type::FLOAT ||
            itemTypeId == arrow::Type::DOUBLE ||
+#if ARROW_VERSION_MAJOR >= 18
+           itemTypeId == arrow::Type::DECIMAL32 ||
+           itemTypeId == arrow::Type::DECIMAL64 ||
+#endif
            itemTypeId == arrow::Type::DECIMAL128 ||
            itemTypeId == arrow::Type::DECIMAL256 ||
            itemTypeId == arrow::Type::STRING ||
@@ -427,6 +431,10 @@ inline bool OGRArrowLayer::MapArrowTypeToOGR(
                                    // nanosecond accuracy
             break;
 
+#if ARROW_VERSION_MAJOR >= 18
+        case arrow::Type::DECIMAL32:
+        case arrow::Type::DECIMAL64:
+#endif
         case arrow::Type::DECIMAL128:
         case arrow::Type::DECIMAL256:
         {
@@ -473,6 +481,10 @@ inline bool OGRArrowLayer::MapArrowTypeToOGR(
                     eSubType = OFSTFloat32;
                     break;
                 case arrow::Type::DOUBLE:
+#if ARROW_VERSION_MAJOR >= 18
+                case arrow::Type::DECIMAL32:
+                case arrow::Type::DECIMAL64:
+#endif
                 case arrow::Type::DECIMAL128:
                 case arrow::Type::DECIMAL256:
                     eType = OFTRealList;
@@ -1279,6 +1291,23 @@ static void AddToArray(CPLJSONArray &oArray, const arrow::Array *array,
                 static_cast<const arrow::DoubleArray *>(array)->Value(nIdx));
             break;
         }
+
+#if ARROW_VERSION_MAJOR >= 18
+        case arrow::Type::DECIMAL32:
+        {
+            oArray.Add(CPLAtof(static_cast<const arrow::Decimal32Array *>(array)
+                                   ->FormatValue(nIdx)
+                                   .c_str()));
+            break;
+        }
+        case arrow::Type::DECIMAL64:
+        {
+            oArray.Add(CPLAtof(static_cast<const arrow::Decimal64Array *>(array)
+                                   ->FormatValue(nIdx)
+                                   .c_str()));
+            break;
+        }
+#endif
         case arrow::Type::DECIMAL128:
         {
             oArray.Add(
@@ -1449,6 +1478,25 @@ static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
                 static_cast<const arrow::DoubleArray *>(array)->Value(nIdx));
             break;
         }
+
+#if ARROW_VERSION_MAJOR >= 18
+        case arrow::Type::DECIMAL32:
+        {
+            oDict.Add(osKey,
+                      CPLAtof(static_cast<const arrow::Decimal32Array *>(array)
+                                  ->FormatValue(nIdx)
+                                  .c_str()));
+            break;
+        }
+        case arrow::Type::DECIMAL64:
+        {
+            oDict.Add(osKey,
+                      CPLAtof(static_cast<const arrow::Decimal64Array *>(array)
+                                  ->FormatValue(nIdx)
+                                  .c_str()));
+            break;
+        }
+#endif
         case arrow::Type::DECIMAL128:
         {
             oDict.Add(osKey,
@@ -1710,6 +1758,48 @@ static void ReadList(OGRFeature *poFeature, int i, int64_t nIdxInArray,
             break;
         }
 
+#if ARROW_VERSION_MAJOR >= 18
+        case arrow::Type::DECIMAL32:
+        {
+            const auto values = std::static_pointer_cast<arrow::Decimal32Array>(
+                array->values());
+            const auto nIdxStart = array->value_offset(nIdxInArray);
+            const int nCount = array->value_length(nIdxInArray);
+            std::vector<double> aValues;
+            aValues.reserve(nCount);
+            for (int k = 0; k < nCount; k++)
+            {
+                if (values->IsNull(nIdxStart + k))
+                    aValues.push_back(std::numeric_limits<double>::quiet_NaN());
+                else
+                    aValues.push_back(
+                        CPLAtof(values->FormatValue(nIdxStart + k).c_str()));
+            }
+            poFeature->SetField(i, nCount, aValues.data());
+            break;
+        }
+
+        case arrow::Type::DECIMAL64:
+        {
+            const auto values = std::static_pointer_cast<arrow::Decimal64Array>(
+                array->values());
+            const auto nIdxStart = array->value_offset(nIdxInArray);
+            const int nCount = array->value_length(nIdxInArray);
+            std::vector<double> aValues;
+            aValues.reserve(nCount);
+            for (int k = 0; k < nCount; k++)
+            {
+                if (values->IsNull(nIdxStart + k))
+                    aValues.push_back(std::numeric_limits<double>::quiet_NaN());
+                else
+                    aValues.push_back(
+                        CPLAtof(values->FormatValue(nIdxStart + k).c_str()));
+            }
+            poFeature->SetField(i, nCount, aValues.data());
+            break;
+        }
+#endif
+
         case arrow::Type::DECIMAL128:
         {
             const auto values =
@@ -2313,6 +2403,26 @@ inline OGRFeature *OGRArrowLayer::ReadFeature(
                 break;
             }
 
+#if ARROW_VERSION_MAJOR >= 18
+            case arrow::Type::DECIMAL32:
+            {
+                const auto castArray =
+                    static_cast<const arrow::Decimal32Array *>(array);
+                poFeature->SetField(
+                    i, CPLAtof(castArray->FormatValue(nIdxInBatch).c_str()));
+                break;
+            }
+
+            case arrow::Type::DECIMAL64:
+            {
+                const auto castArray =
+                    static_cast<const arrow::Decimal64Array *>(array);
+                poFeature->SetField(
+                    i, CPLAtof(castArray->FormatValue(nIdxInBatch).c_str()));
+                break;
+            }
+#endif
+
             case arrow::Type::DECIMAL128:
             {
                 const auto castArray =
@@ -3803,6 +3913,34 @@ inline bool OGRArrowLayer::SkipToNextFeatureDueToAttributeFilter() const
                 break;
             }
 
+#if ARROW_VERSION_MAJOR >= 18
+            case arrow::Type::DECIMAL32:
+            {
+                const auto castArray =
+                    static_cast<const arrow::Decimal32Array *>(array);
+                if (!ConstraintEvaluator(
+                        constraint,
+                        CPLAtof(castArray->FormatValue(m_nIdxInBatch).c_str())))
+                {
+                    return true;
+                }
+                break;
+            }
+
+            case arrow::Type::DECIMAL64:
+            {
+                const auto castArray =
+                    static_cast<const arrow::Decimal64Array *>(array);
+                if (!ConstraintEvaluator(
+                        constraint,
+                        CPLAtof(castArray->FormatValue(m_nIdxInBatch).c_str())))
+                {
+                    return true;
+                }
+                break;
+            }
+#endif
+
             case arrow::Type::DECIMAL128:
             {
                 const auto castArray =
diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowwriterlayer.hpp
@@ -199,7 +199,19 @@ inline void OGRArrowWriterLayer::CreateSchemaCommon()
             {
                 const int nPrecision = poFieldDefn->GetPrecision();
                 if (nWidth != 0 && nPrecision != 0)
-                    dt = arrow::decimal(nWidth, nPrecision);
+                {
+                    // Since arrow 18.0, we could use arrow::smallest_decimal()
+                    // to return the smallest representation (i.e. possibly
+                    // decimal32 and decimal64). But for now keep decimal128
+                    // as the minimum for backwards compatibility.
+                    // GetValueDecimal() and other functions in
+                    // ogrlayerarrow.cpp would have to be adapted for decimal32
+                    // and decimal64 compatibility.
+                    if (nWidth > 38)
+                        dt = arrow::decimal256(nWidth, nPrecision);
+                    else
+                        dt = arrow::decimal128(nWidth, nPrecision);
+                }
                 else if (eSubDT == OFSTFloat32)
                     dt = arrow::float32();
                 else
diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp
@@ -1404,6 +1404,10 @@ bool OGRParquetLayer::ReadNextBatch()
 {
     m_nIdxInBatch = 0;
 
+    const int nNumGroups = m_poArrowReader->num_row_groups();
+    if (nNumGroups == 0)
+        return false;
+
     if (m_bSingleBatch)
     {
         CPLAssert(m_iRecordBatch == 0);
@@ -1456,7 +1460,6 @@ bool OGRParquetLayer::ReadNextBatch()
         }
         else
         {
-            const int nNumGroups = m_poArrowReader->num_row_groups();
             OGRField sMin;
             OGRField sMax;
             OGR_RawField_SetNull(&sMin);

Original file line number	Diff line number	Diff line change
`@@ -1404,6 +1404,10 @@ bool OGRParquetLayer::ReadNextBatch()`
`1404`	`1404`	`{`
`1405`	`1405`	`m_nIdxInBatch = 0;`
`1406`	`1406`
	`1407`	`+ const int nNumGroups = m_poArrowReader->num_row_groups();`
	`1408`	`+ if (nNumGroups == 0)`
	`1409`	`+ return false;`
	`1410`	`+`
`1407`	`1411`	`if (m_bSingleBatch)`
`1408`	`1412`	`{`
`1409`	`1413`	`CPLAssert(m_iRecordBatch == 0);`
`@@ -1456,7 +1460,6 @@ bool OGRParquetLayer::ReadNextBatch()`
`1456`	`1460`	`}`
`1457`	`1461`	`else`
`1458`	`1462`	`{`
`1459`		`- const int nNumGroups = m_poArrowReader->num_row_groups();`
`1460`	`1463`	`OGRField sMin;`
`1461`	`1464`	`OGRField sMax;`
`1462`	`1465`	`OGR_RawField_SetNull(&sMin);`