diff --git a/autotest/ogr/ogr_adbc.py b/autotest/ogr/ogr_adbc.py index 8ee1da12a50f..7a6f8b93c0e0 100755 --- a/autotest/ogr/ogr_adbc.py +++ b/autotest/ogr/ogr_adbc.py @@ -326,6 +326,36 @@ def test_ogr_adbc_test_ogrsf_parquet_filename_with_glob(): assert "ERROR" not in ret +############################################################################### +# Test DATETIME_AS_STRING=YES GetArrowStream() option + + +def test_ogr_adbc_arrow_stream_numpy_datetime_as_string(tmp_vsimem): + pytest.importorskip("osgeo.gdal_array") + pytest.importorskip("numpy") + + if not _has_libduckdb(): + pytest.skip("libduckdb.so missing") + + with gdal.OpenEx( + "data/parquet/test.parquet", gdal.OF_VECTOR, allowed_drivers=["ADBC"] + ) as ds: + lyr = ds.GetLayer(0) + stream = lyr.GetArrowStreamAsNumPy( + options=["USE_MASKED_ARRAYS=NO", "DATETIME_AS_STRING=YES"] + ) + batches = [batch for batch in stream] + batch = batches[0] + # Should be "2019-01-01T14:00:00.500-02:15" but DuckDB returns in UTC + # On my machine, for some reason it returns without the Z, whereas on + # the ubuntu_2404 it returns with the Z... despite both using libduckdb 1.1.3 + # at time of writing... + assert batch["timestamp_ms_gmt_minus_0215"][0] in ( + b"2019-01-01T16:15:00.500", + b"2019-01-01T16:15:00.500Z", + ) + + ############################################################################### # Run test_ogrsf on a DuckDB dataset diff --git a/autotest/ogr/ogr_flatgeobuf.py b/autotest/ogr/ogr_flatgeobuf.py index bdbd0819e106..6e68443010ea 100644 --- a/autotest/ogr/ogr_flatgeobuf.py +++ b/autotest/ogr/ogr_flatgeobuf.py @@ -1564,3 +1564,52 @@ def test_ogr_flatgeobuf_sql_arrow(tmp_vsimem): assert f["bar"] == "baz" assert f.GetGeometryRef().ExportToWkt() == "POINT (1 2)" f = tmp_lyr.GetNextFeature() + + +############################################################################### +# Test DATETIME_AS_STRING=YES GetArrowStream() option + + +def test_ogr_flatgeobuf_arrow_stream_numpy_datetime_as_string(tmp_vsimem): + pytest.importorskip("osgeo.gdal_array") + pytest.importorskip("numpy") + + filename = str(tmp_vsimem / "datetime_as_string.fgb") + with ogr.GetDriverByName("FlatGeoBuf").CreateDataSource(filename) as ds: + lyr = ds.CreateLayer("test") + + field = ogr.FieldDefn("datetime", ogr.OFTDateTime) + lyr.CreateField(field) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT (1 2)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56.789Z") + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT (1 2)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56") + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT (1 2)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56+12:30") + f.SetGeometry(ogr.CreateGeometryFromWkt("POINT (1 2)")) + lyr.CreateFeature(f) + + with ogr.Open(filename) as ds: + lyr = ds.GetLayer(0) + stream = lyr.GetArrowStreamAsNumPy( + options=["USE_MASKED_ARRAYS=NO", "DATETIME_AS_STRING=YES"] + ) + batches = [batch for batch in stream] + assert len(batches) == 1 + batch = batches[0] + assert len(batch["datetime"]) == 4 + assert batch["datetime"][0] == b"" + assert batch["datetime"][1] == b"2022-05-31T12:34:56.789Z" + assert batch["datetime"][2] == b"2022-05-31T12:34:56" + assert batch["datetime"][3] == b"2022-05-31T12:34:56+12:30" diff --git a/autotest/ogr/ogr_gpkg.py b/autotest/ogr/ogr_gpkg.py index 78e882d94892..0654d5e07945 100755 --- a/autotest/ogr/ogr_gpkg.py +++ b/autotest/ogr/ogr_gpkg.py @@ -10766,3 +10766,76 @@ def test_gpkg_secure_delete(tmp_vsimem): with ds.ExecuteSQL("PRAGMA secure_delete") as sql_lyr: f = sql_lyr.GetNextFeature() assert f.GetField(0) == 0 + + +############################################################################### +# Test DATETIME_AS_STRING=YES GetArrowStream() option + + +def test_ogr_gpkg_arrow_stream_numpy_datetime_as_string(tmp_vsimem): + pytest.importorskip("osgeo.gdal_array") + pytest.importorskip("numpy") + + filename = str(tmp_vsimem / "datetime_as_string.gpkg") + ds = ogr.GetDriverByName("GPKG").CreateDataSource(filename) + lyr = ds.CreateLayer("test") + + field = ogr.FieldDefn("datetime", ogr.OFTDateTime) + lyr.CreateField(field) + + f = ogr.Feature(lyr.GetLayerDefn()) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56.789Z") + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56.000") + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56.000+12:30") + lyr.CreateFeature(f) + + # Test DATETIME_AS_STRING=YES + stream = lyr.GetArrowStreamAsNumPy( + options=["USE_MASKED_ARRAYS=NO", "DATETIME_AS_STRING=YES"] + ) + batches = [batch for batch in stream] + assert len(batches) == 1 + batch = batches[0] + assert len(batch["datetime"]) == 4 + assert batch["datetime"][0] == b"" + assert batch["datetime"][1] == b"2022-05-31T12:34:56.789Z" + assert batch["datetime"][2] == b"2022-05-31T12:34:56.000" + assert batch["datetime"][3] == b"2022-05-31T12:34:56.000+12:30" + + # Setting a filer tests the use of the less optimized + # OGRGeoPackageTableLayer::GetNextArray() implementation + lyr.SetAttributeFilter("1 = 1") + stream = lyr.GetArrowStreamAsNumPy( + options=["USE_MASKED_ARRAYS=NO", "DATETIME_AS_STRING=YES"] + ) + lyr.SetAttributeFilter(None) + batches = [batch for batch in stream] + assert len(batches) == 1 + batch = batches[0] + assert len(batch["datetime"]) == 4 + assert batch["datetime"][0] == b"" + assert batch["datetime"][1] == b"2022-05-31T12:34:56.789Z" + assert batch["datetime"][2] == b"2022-05-31T12:34:56.000" + assert batch["datetime"][3] == b"2022-05-31T12:34:56.000+12:30" + + with ds.ExecuteSQL("SELECT * FROM test") as sql_lyr: + stream = sql_lyr.GetArrowStreamAsNumPy( + options=["USE_MASKED_ARRAYS=NO", "DATETIME_AS_STRING=YES"] + ) + batches = [batch for batch in stream] + assert len(batches) == 1 + batch = batches[0] + assert len(batch["datetime"]) == 4 + assert batch["datetime"][0] == b"" + assert batch["datetime"][1] == b"2022-05-31T12:34:56.789Z" + assert batch["datetime"][2] == b"2022-05-31T12:34:56.000" + assert batch["datetime"][3] == b"2022-05-31T12:34:56.000+12:30" diff --git a/autotest/ogr/ogr_mem.py b/autotest/ogr/ogr_mem.py index 3d26ac31a00b..c5918ea20085 100755 --- a/autotest/ogr/ogr_mem.py +++ b/autotest/ogr/ogr_mem.py @@ -979,6 +979,49 @@ def test_ogr_mem_arrow_stream_numpy(): assert len(batches) == 0 +############################################################################### +# Test DATETIME_AS_STRING=YES GetArrowStream() option + + +def test_ogr_mem_arrow_stream_numpy_datetime_as_string(): + pytest.importorskip("osgeo.gdal_array") + pytest.importorskip("numpy") + + ds = ogr.GetDriverByName("Memory").CreateDataSource("") + lyr = ds.CreateLayer("foo") + + field = ogr.FieldDefn("datetime", ogr.OFTDateTime) + lyr.CreateField(field) + + f = ogr.Feature(lyr.GetLayerDefn()) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56.789Z") + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56") + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetField("datetime", "2022-05-31T12:34:56+12:30") + lyr.CreateFeature(f) + + # Test DATETIME_AS_STRING=YES + stream = lyr.GetArrowStreamAsNumPy( + options=["USE_MASKED_ARRAYS=NO", "DATETIME_AS_STRING=YES"] + ) + batches = [batch for batch in stream] + assert len(batches) == 1 + batch = batches[0] + assert len(batch["datetime"]) == 4 + assert batch["datetime"][0] == b"" + assert batch["datetime"][1] == b"2022-05-31T12:34:56.789Z" + assert batch["datetime"][2] == b"2022-05-31T12:34:56" + assert batch["datetime"][3] == b"2022-05-31T12:34:56+12:30" + + ############################################################################### diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py index b89fc1673b1e..1608ea58c86d 100755 --- a/autotest/ogr/ogr_parquet.py +++ b/autotest/ogr/ogr_parquet.py @@ -4150,3 +4150,25 @@ def test_ogr_parquet_IsArrowSchemaSupported_arrow_15_types( success, error_msg = dst_lyr.IsArrowSchemaSupported(schema) assert not success assert error_msg == expected_error_msg + + +############################################################################### +# Test DATETIME_AS_STRING=YES GetArrowStream() option + + +def test_ogr_parquet_arrow_stream_numpy_datetime_as_string(tmp_vsimem): + pytest.importorskip("osgeo.gdal_array") + pytest.importorskip("numpy") + + with gdal.OpenEx( + "data/parquet/test.parquet", gdal.OF_VECTOR, allowed_drivers=["Parquet"] + ) as ds: + lyr = ds.GetLayer(0) + stream = lyr.GetArrowStreamAsNumPy( + options=["USE_MASKED_ARRAYS=NO", "DATETIME_AS_STRING=YES"] + ) + batches = [batch for batch in stream] + batch = batches[0] + assert ( + batch["timestamp_ms_gmt_minus_0215"][0] == b"2019-01-01T14:00:00.500-02:15" + ) diff --git a/ogr/ogrsf_frmts/adbc/ogradbclayer.cpp b/ogr/ogrsf_frmts/adbc/ogradbclayer.cpp index 02a7b45d7c2e..66169b486dca 100644 --- a/ogr/ogrsf_frmts/adbc/ogradbclayer.cpp +++ b/ogr/ogrsf_frmts/adbc/ogradbclayer.cpp @@ -155,8 +155,11 @@ GDALDataset *OGRADBCLayer::GetDataset() bool OGRADBCLayer::GetArrowStream(struct ArrowArrayStream *out_stream, CSLConstList papszOptions) { - if (m_poFilterGeom || m_poAttrQuery) + if (m_poFilterGeom || m_poAttrQuery || + CPLFetchBool(papszOptions, GAS_OPT_DATETIME_AS_STRING, false)) + { return OGRLayer::GetArrowStream(out_stream, papszOptions); + } if (m_stream) { diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index a52cf4e4227f..bc6453618ae3 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -5490,6 +5490,24 @@ inline bool OGRArrowLayer::UseRecordBatchBaseImplementation() const return true; } + if (m_aosArrowArrayStreamOptions.FetchBool(GAS_OPT_DATETIME_AS_STRING, + false)) + { + const int nFieldCount = m_poFeatureDefn->GetFieldCount(); + for (int i = 0; i < nFieldCount; ++i) + { + const auto poFieldDefn = m_poFeatureDefn->GetFieldDefn(i); + if (!poFieldDefn->IsIgnored() && + poFieldDefn->GetType() == OFTDateTime) + { + CPLDebug("ARROW", + "DATETIME_AS_STRING=YES not compatible of fast " + "Arrow implementation"); + return true; + } + } + } + if (EQUAL(m_aosArrowArrayStreamOptions.FetchNameValueDef( "GEOMETRY_ENCODING", ""), "WKB")) diff --git a/ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp b/ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp index fc02bb18a4db..22767210a8df 100644 --- a/ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp +++ b/ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp @@ -18,6 +18,7 @@ #include "cpl_time.h" #include "ogr_p.h" #include "ograrrowarrayhelper.h" +#include "ogrlayerarrow.h" #include "ogr_recordbatch.h" #include "ogr_flatgeobuf.h" @@ -1481,6 +1482,8 @@ int OGRFlatGeobufLayer::GetNextArrowArray(struct ArrowArrayStream *stream, } const GIntBig nFeatureIdxStart = m_featuresPos; + const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool( + GAS_OPT_DATETIME_AS_STRING, false); const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit(); while (iFeat < sHelper.m_nMaxBatchSize) @@ -1851,6 +1854,58 @@ int OGRFlatGeobufLayer::GetNextArrowArray(struct ArrowArrayStream *stream, offset += sizeof(double); break; + case ColumnType::DateTime: + { + if (!bDateTimeAsString) + { + if (offset + sizeof(uint32_t) > size) + { + CPLErrorInvalidSize("datetime length "); + goto error; + } + uint32_t len; + memcpy(&len, data + offset, sizeof(int32_t)); + CPL_LSBPTR32(&len); + offset += sizeof(uint32_t); + if (len > size - offset || len > 32) + { + CPLErrorInvalidSize("datetime value"); + goto error; + } + if (!isIgnored) + { + OGRField ogrField; + if (ParseDateTime( + reinterpret_cast(data + + offset), + len, &ogrField)) + { + sHelper.SetDateTime( + psArray, iFeat, brokenDown, + sHelper.m_anTZFlags[i], ogrField); + } + else + { + char str[32 + 1]; + memcpy(str, data + offset, len); + str[len] = '\0'; + if (OGRParseDate(str, &ogrField, 0)) + { + sHelper.SetDateTime( + psArray, iFeat, brokenDown, + sHelper.m_anTZFlags[i], ogrField); + } + } + } + offset += len; + break; + } + else + { + [[fallthrough]]; + } + } + case ColumnType::String: case ColumnType::Json: case ColumnType::Binary: @@ -1896,50 +1951,6 @@ int OGRFlatGeobufLayer::GetNextArrowArray(struct ArrowArrayStream *stream, offset += len; break; } - - case ColumnType::DateTime: - { - if (offset + sizeof(uint32_t) > size) - { - CPLErrorInvalidSize("datetime length "); - goto error; - } - uint32_t len; - memcpy(&len, data + offset, sizeof(int32_t)); - CPL_LSBPTR32(&len); - offset += sizeof(uint32_t); - if (len > size - offset || len > 32) - { - CPLErrorInvalidSize("datetime value"); - goto error; - } - if (!isIgnored) - { - OGRField ogrField; - if (ParseDateTime(reinterpret_cast( - data + offset), - len, &ogrField)) - { - sHelper.SetDateTime(psArray, iFeat, brokenDown, - sHelper.m_anTZFlags[i], - ogrField); - } - else - { - char str[32 + 1]; - memcpy(str, data + offset, len); - str[len] = '\0'; - if (OGRParseDate(str, &ogrField, 0)) - { - sHelper.SetDateTime( - psArray, iFeat, brokenDown, - sHelper.m_anTZFlags[i], ogrField); - } - } - } - offset += len; - break; - } } } } diff --git a/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.cpp b/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.cpp index e17a378e68ba..37a3de83c3c3 100644 --- a/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.cpp +++ b/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.cpp @@ -11,6 +11,7 @@ ****************************************************************************/ #include "ograrrowarrayhelper.h" +#include "ogrlayerarrow.h" #include "ogr_p.h" #include @@ -94,6 +95,8 @@ OGRArrowArrayHelper::OGRArrowArrayHelper( nTZFlagOverride = OGR_TZFLAG_UTC; } } + const bool bDateTimeAsString = + aosArrowArrayStreamOptions.FetchBool(GAS_OPT_DATETIME_AS_STRING, false); if (m_bIncludeFID) { @@ -222,6 +225,20 @@ OGRArrowArrayHelper::OGRArrowArrayHelper( } break; } + + case OFTDateTime: + { + if (!bDateTimeAsString) + { + nEltSize = sizeof(int64_t); + break; + } + else + { + [[fallthrough]]; + } + } + case OFTString: case OFTBinary: { @@ -256,12 +273,6 @@ OGRArrowArrayHelper::OGRArrowArrayHelper( break; } - case OFTDateTime: - { - nEltSize = sizeof(int64_t); - break; - } - default: break; } diff --git a/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp b/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp index dae75364e787..b51184064d50 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp +++ b/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp @@ -24,6 +24,8 @@ #include "cpl_float.h" #include "cpl_json.h" #include "cpl_time.h" + +#include #include #include #include @@ -376,6 +378,8 @@ int OGRLayer::GetArrowSchema(struct ArrowArrayStream *, { const bool bIncludeFID = CPLTestBool( m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES")); + const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool( + GAS_OPT_DATETIME_AS_STRING, false); memset(out_schema, 0, sizeof(*out_schema)); out_schema->format = "+s"; out_schema->name = CPLStrdup(""); @@ -523,7 +527,11 @@ int OGRLayer::GetArrowSchema(struct ArrowArrayStream *, const char *pszPrefix = "tsm:"; const char *pszTZOverride = m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE"); - if (pszTZOverride && EQUAL(pszTZOverride, "unknown")) + if (bDateTimeAsString) + { + psChild->format = "u"; + } + else if (pszTZOverride && EQUAL(pszTZOverride, "unknown")) { psChild->format = CPLStrdup(pszPrefix); } @@ -1812,6 +1820,99 @@ FillDateTimeArray(struct ArrowArray *psChild, return true; } +/************************************************************************/ +/* FillDateTimeArrayAsString() */ +/************************************************************************/ + +static size_t +FillDateTimeArrayAsString(struct ArrowArray *psChild, + std::deque> &apoFeatures, + const size_t nFeatureCountLimit, + const bool bIsNullable, const int i, + const size_t nMemLimit) +{ + psChild->n_buffers = 3; + psChild->buffers = static_cast(CPLCalloc(3, sizeof(void *))); + uint8_t *pabyValidity = nullptr; + using T = uint32_t; + T *panOffsets = static_cast( + VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit))); + if (panOffsets == nullptr) + return 0; + psChild->buffers[1] = panOffsets; + + size_t nOffset = 0; + size_t nFeatCount = 0; + for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount) + { + panOffsets[iFeat] = static_cast(nOffset); + const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i); + if (IsValidField(psRawField)) + { + size_t nLen = strlen("YYYY-MM-DDTHH:MM:SS"); + if (fmodf(psRawField->Date.Second, 1.0f) != 0) + nLen += strlen(".sss"); + if (psRawField->Date.TZFlag == OGR_TZFLAG_UTC) + nLen += 1; // 'Z' + else if (psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ) + nLen += strlen("+hh:mm"); + if (nLen > nMemLimit - nOffset) + { + if (nFeatCount == 0) + return 0; + break; + } + nOffset += static_cast(nLen); + } + else if (bIsNullable) + { + ++psChild->null_count; + if (pabyValidity == nullptr) + { + pabyValidity = AllocValidityBitmap(nFeatureCountLimit); + psChild->buffers[0] = pabyValidity; + if (pabyValidity == nullptr) + return 0; + } + UnsetBit(pabyValidity, iFeat); + } + } + panOffsets[nFeatCount] = static_cast(nOffset); + + char *pachValues = + static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1)); + if (pachValues == nullptr) + return 0; + psChild->buffers[2] = pachValues; + + nOffset = 0; + char szBuffer[OGR_SIZEOF_ISO8601_DATETIME_BUFFER]; + OGRISO8601Format sFormat; + sFormat.ePrecision = OGRISO8601Precision::AUTO; + for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat) + { + const int nLen = + static_cast(panOffsets[iFeat + 1] - panOffsets[iFeat]); + if (nLen) + { + const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i); + int nBufSize = OGRGetISO8601DateTime(psRawField, sFormat, szBuffer); + if (nBufSize) + { + memcpy(pachValues + nOffset, szBuffer, + std::min(nLen, nBufSize)); + } + if (nBufSize < nLen) + { + memset(pachValues + nOffset + nBufSize, 0, nLen - nBufSize); + } + nOffset += nLen; + } + } + + return nFeatCount; +} + /************************************************************************/ /* GetNextArrowArray() */ /************************************************************************/ @@ -1832,6 +1933,8 @@ int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream, const bool bIncludeFID = CPLTestBool( m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES")); + const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool( + GAS_OPT_DATETIME_AS_STRING, false); int nMaxBatchSize = atoi(m_aosArrowArrayStreamOptions.FetchNameValueDef( "MAX_FEATURES_IN_BATCH", "65536")); if (nMaxBatchSize <= 0) @@ -2179,10 +2282,25 @@ int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream, case OFTDateTime: { - if (!FillDateTimeArray(psChild, oFeatureQueue, nFeatureCount, - bIsNullable, i, - poFieldDefn->GetTZFlag())) - goto error; + if (bDateTimeAsString) + { + const size_t nThisFeatureCount = FillDateTimeArrayAsString( + psChild, oFeatureQueue, nFeatureCount, bIsNullable, i, + nMemLimit); + if (nThisFeatureCount == 0) + { + goto error_max_mem; + } + if (nThisFeatureCount < nFeatureCount) + nFeatureCount = nThisFeatureCount; + } + else + { + if (!FillDateTimeArray(psChild, oFeatureQueue, + nFeatureCount, bIsNullable, i, + poFieldDefn->GetTZFlag())) + goto error; + } break; } } @@ -2419,6 +2537,15 @@ From OGR using the Arrow C Stream data interface tutorial. * to UTC of a OGRField::Date is only done if both the timezone indicated by * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by * this TIMEZONE option) are not unknown. + *
  • DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11. + * Whether DateTime fields should be returned as a (normally ISO-8601 + * formatted) string by drivers. The aim is to be able to handle mixed + * timezones (or timezone naive values) in the same column. + * All drivers must honour that option, and potentially fallback to the + * OGRLayer generic implementation if they cannot (which is the case for the + * Arrow, Parquet and ADBC drivers). + * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored. + *
  • *
  • GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8). * The default is OGC, which will lead to setting * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb. @@ -2613,6 +2740,15 @@ YES.
  • * to UTC of a OGRField::Date is only done if both the timezone indicated by * OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by * this TIMEZONE option) are not unknown. + *
  • DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11. + * Whether DateTime fields should be returned as a (normally ISO-8601 + * formatted) string by drivers. The aim is to be able to handle mixed + * timezones (or timezone naive values) in the same column. + * All drivers must honour that option, and potentially fallback to the + * OGRLayer generic implementation if they cannot (which is the case for the + * Arrow, Parquet and ADBC drivers). + * When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored. + *
  • *
  • GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8). * The default is OGC, which will lead to setting * the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb. diff --git a/ogr/ogrsf_frmts/generic/ogrlayerarrow.h b/ogr/ogrsf_frmts/generic/ogrlayerarrow.h index e8f5fbd489d1..5571a649cc8a 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayerarrow.h +++ b/ogr/ogrsf_frmts/generic/ogrlayerarrow.h @@ -26,6 +26,9 @@ constexpr const char *EXTENSION_NAME_OGC_WKB = "ogc.wkb"; constexpr const char *EXTENSION_NAME_GEOARROW_WKB = "geoarrow.wkb"; constexpr const char *EXTENSION_NAME_ARROW_JSON = "arrow.json"; +// GetArrowStream(GAS) options +constexpr const char *GAS_OPT_DATETIME_AS_STRING = "DATETIME_AS_STRING"; + std::map CPL_DLL OGRParseArrowMetadata(const char *pabyMetadata); diff --git a/ogr/ogrsf_frmts/gpkg/ogr_geopackage.h b/ogr/ogrsf_frmts/gpkg/ogr_geopackage.h index ec7432de933c..2d1422ec0155 100644 --- a/ogr/ogrsf_frmts/gpkg/ogr_geopackage.h +++ b/ogr/ogrsf_frmts/gpkg/ogr_geopackage.h @@ -87,6 +87,7 @@ struct OGRGPKGTableLayerFillArrowArray int nCountRows = 0; bool bErrorOccurred = false; bool bMemoryLimitReached = false; + bool bDateTimeAsString = false; std::string osErrorMsg{}; OGRFeatureDefn *poFeatureDefn = nullptr; OGRGeoPackageLayer *poLayer = nullptr; diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagelayer.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagelayer.cpp index 479b717edc81..7d8265310e55 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagelayer.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagelayer.cpp @@ -17,6 +17,7 @@ #include "ogr_p.h" #include "ogr_recordbatch.h" #include "ograrrowarrayhelper.h" +#include "ogrlayerarrow.h" /************************************************************************/ /* OGRGeoPackageLayer() */ @@ -550,6 +551,9 @@ int OGRGeoPackageLayer::GetNextArrowArray(struct ArrowArrayStream *stream, struct tm brokenDown; memset(&brokenDown, 0, sizeof(brokenDown)); + const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool( + GAS_OPT_DATETIME_AS_STRING, false); + const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit(); int iFeat = 0; while (iFeat < sHelper.m_nMaxBatchSize) @@ -845,15 +849,23 @@ int OGRGeoPackageLayer::GetNextArrowArray(struct ArrowArrayStream *stream, case OFTDateTime: { - OGRField ogrField; - if (ParseDateTimeField(hStmt, iRawField, nSqlite3ColType, - &ogrField, poFieldDefn, nFID)) + if (!bDateTimeAsString) + { + OGRField ogrField; + if (ParseDateTimeField(hStmt, iRawField, + nSqlite3ColType, &ogrField, + poFieldDefn, nFID)) + { + sHelper.SetDateTime(psArray, iFeat, brokenDown, + sHelper.m_anTZFlags[iField], + ogrField); + } + break; + } + else { - sHelper.SetDateTime(psArray, iFeat, brokenDown, - sHelper.m_anTZFlags[iField], - ogrField); + [[fallthrough]]; } - break; } case OFTString: diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp index 8fc0a3d444e3..724c2a6069dd 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp @@ -13,6 +13,7 @@ #include "ogr_geopackage.h" #include "ogrgeopackageutility.h" +#include "ogrlayerarrow.h" #include "ogrsqliteutility.h" #include "cpl_md5.h" #include "cpl_time.h" @@ -7820,6 +7821,7 @@ void OGR_GPKG_FillArrowArray_Step(sqlite3_context *pContext, int /*argc*/, const auto nMemLimit = psFillArrowArray->nMemLimit; const int SQLITE_MAX_FUNCTION_ARG = sqlite3_limit(psFillArrowArray->hDB, SQLITE_LIMIT_FUNCTION_ARG, -1); + const bool bDateTimeAsString = psFillArrowArray->bDateTimeAsString; begin: int iFeat; OGRArrowArrayHelper *psHelper; @@ -8171,18 +8173,25 @@ void OGR_GPKG_FillArrowArray_Step(sqlite3_context *pContext, int /*argc*/, case OFTDateTime: { - OGRField ogrField; - const auto pszTxt = reinterpret_cast( - sqlite3_value_text(argv[iCol])); - if (pszTxt != nullptr && - psFillArrowArray->poLayer->ParseDateTimeField( - pszTxt, &ogrField, poFieldDefn, nFID)) + if (!bDateTimeAsString) + { + OGRField ogrField; + const auto pszTxt = reinterpret_cast( + sqlite3_value_text(argv[iCol])); + if (pszTxt != nullptr && + psFillArrowArray->poLayer->ParseDateTimeField( + pszTxt, &ogrField, poFieldDefn, nFID)) + { + psHelper->SetDateTime( + psArray, iFeat, psFillArrowArray->brokenDown, + psHelper->m_anTZFlags[iField], ogrField); + } + break; + } + else { - psHelper->SetDateTime( - psArray, iFeat, psFillArrowArray->brokenDown, - psHelper->m_anTZFlags[iField], ogrField); + [[fallthrough]]; } - break; } case OFTString: @@ -8337,6 +8346,9 @@ int OGRGeoPackageTableLayer::GetNextArrowArrayAsynchronous( m_poFillArrowArray->psHelper = std::move(psHelper); m_poFillArrowArray->nCountRows = 0; m_poFillArrowArray->bErrorOccurred = false; + m_poFillArrowArray->bDateTimeAsString = + m_aosArrowArrayStreamOptions.FetchBool(GAS_OPT_DATETIME_AS_STRING, + false); m_poFillArrowArray->poFeatureDefn = m_poFeatureDefn; m_poFillArrowArray->poLayer = this; m_poFillArrowArray->hDB = m_poDS->GetDB(); @@ -8888,6 +8900,8 @@ int OGRGeoPackageTableLayer::GetNextArrowArrayInternal( sFillArrowArray.nCountRows = 0; sFillArrowArray.bMemoryLimitReached = false; sFillArrowArray.bErrorOccurred = false; + sFillArrowArray.bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool( + GAS_OPT_DATETIME_AS_STRING, false); sFillArrowArray.poFeatureDefn = m_poFeatureDefn; sFillArrowArray.poLayer = this; sFillArrowArray.hDB = m_poDS->GetDB();