Skip to content

Commit

Permalink
Arrow reading: generic code path (as used by GeoJSON): fix mis-handli…
Browse files Browse the repository at this point in the history
…ng of timezones

Fixes
geopandas/pyogrio#487 (comment)
regarding GeoJSON
  • Loading branch information
rouault committed Oct 19, 2024
1 parent 4a92011 commit 427c8dc
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 34 deletions.
96 changes: 72 additions & 24 deletions autotest/ogr/ogr_geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -4605,7 +4605,7 @@ def test_ogr_geojson_arrow_stream_pyarrow_mixed_timezone(tmp_vsimem):


def test_ogr_geojson_arrow_stream_pyarrow_utc_plus_five(tmp_vsimem):
pytest.importorskip("pyarrow")
# pytest.importorskip("pyarrow")

filename = str(
tmp_vsimem / "test_ogr_geojson_arrow_stream_pyarrow_utc_plus_five.geojson"
Expand All @@ -4621,22 +4621,37 @@ def test_ogr_geojson_arrow_stream_pyarrow_utc_plus_five(tmp_vsimem):
lyr.CreateFeature(f)
ds = None

try:
import pyarrow # NOQA

has_pyarrow = True
except ImportError:
has_pyarrow = False
if has_pyarrow:
ds = ogr.Open(filename)
lyr = ds.GetLayer(0)
stream = lyr.GetArrowStreamAsPyArrow()
assert stream.schema.field("datetime").type.tz == "+05:00"
values = []
for batch in stream:
for x in batch.field("datetime"):
values.append(x.value)
assert values == [1653982496789, 1653986096789]

mem_ds = ogr.GetDriverByName("Memory").CreateDataSource("")
mem_lyr = mem_ds.CreateLayer("test", geom_type=ogr.wkbPoint)
ds = ogr.Open(filename)
lyr = ds.GetLayer(0)
stream = lyr.GetArrowStreamAsPyArrow()
assert stream.schema.field("datetime").type.tz == "+05:00"
values = []
for batch in stream:
for x in batch.field("datetime"):
values.append(x.value)
assert values == [1654000496789, 1654004096789]
mem_lyr.WriteArrow(lyr)

f = mem_lyr.GetNextFeature()
assert f["datetime"] == "2022/05/31 12:34:56.789+05"


###############################################################################


def test_ogr_geojson_arrow_stream_pyarrow_utc_minus_five(tmp_vsimem):
pytest.importorskip("pyarrow")

filename = str(
tmp_vsimem / "test_ogr_geojson_arrow_stream_pyarrow_utc_minus_five.geojson"
Expand All @@ -4652,22 +4667,37 @@ def test_ogr_geojson_arrow_stream_pyarrow_utc_minus_five(tmp_vsimem):
lyr.CreateFeature(f)
ds = None

try:
import pyarrow # NOQA

has_pyarrow = True
except ImportError:
has_pyarrow = False
if has_pyarrow:
ds = ogr.Open(filename)
lyr = ds.GetLayer(0)
stream = lyr.GetArrowStreamAsPyArrow()
assert stream.schema.field("datetime").type.tz == "-05:00"
values = []
for batch in stream:
for x in batch.field("datetime"):
values.append(x.value)
assert values == [1654018496789, 1654022096789]

mem_ds = ogr.GetDriverByName("Memory").CreateDataSource("")
mem_lyr = mem_ds.CreateLayer("test", geom_type=ogr.wkbPoint)
ds = ogr.Open(filename)
lyr = ds.GetLayer(0)
stream = lyr.GetArrowStreamAsPyArrow()
assert stream.schema.field("datetime").type.tz == "-05:00"
values = []
for batch in stream:
for x in batch.field("datetime"):
values.append(x.value)
assert values == [1654000496789, 1654004096789]
mem_lyr.WriteArrow(lyr)

f = mem_lyr.GetNextFeature()
assert f["datetime"] == "2022/05/31 12:34:56.789-05"


###############################################################################


def test_ogr_geojson_arrow_stream_pyarrow_unknown_timezone(tmp_vsimem):
pytest.importorskip("pyarrow")

filename = str(
tmp_vsimem / "test_ogr_geojson_arrow_stream_pyarrow_unknown_timezone.geojson"
Expand All @@ -4683,15 +4713,33 @@ def test_ogr_geojson_arrow_stream_pyarrow_unknown_timezone(tmp_vsimem):
lyr.CreateFeature(f)
ds = None

try:
import pyarrow # NOQA

has_pyarrow = True
except ImportError:
has_pyarrow = False
if has_pyarrow:
ds = ogr.Open(filename)
lyr = ds.GetLayer(0)
stream = lyr.GetArrowStreamAsPyArrow()
assert stream.schema.field("datetime").type.tz is None
values = []
for batch in stream:
for x in batch.field("datetime"):
values.append(x.value)
assert values == [1654000496789, 1654004096789]

mem_ds = ogr.GetDriverByName("Memory").CreateDataSource("")
mem_lyr = mem_ds.CreateLayer("test", geom_type=ogr.wkbPoint)
ds = ogr.Open(filename)
lyr = ds.GetLayer(0)
stream = lyr.GetArrowStreamAsPyArrow()
assert stream.schema.field("datetime").type.tz is None
values = []
for batch in stream:
for x in batch.field("datetime"):
values.append(x.value)
assert values == [1654000496789, 1654004096789]
mem_lyr.WriteArrow(lyr)

f = mem_lyr.GetNextFeature()
# We have lost the timezone info here, as there's no way in Arrow to
# have a mixed of with and without timezone in a single column
assert f["datetime"] == "2022/05/31 12:34:56.789"


###############################################################################
Expand Down
11 changes: 1 addition & 10 deletions ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1780,17 +1780,8 @@ FillDateTimeArray(struct ArrowArray *psChild,
auto nVal =
CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
(static_cast<int>(psRawField->Date.Second * 1000 + 0.5) % 1000);
if (nFieldTZFlag > OGR_TZFLAG_MIXED_TZ &&
if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
{
// Convert for psRawField->Date.TZFlag to nFieldTZFlag
const int TZOffset =
(psRawField->Date.TZFlag - nFieldTZFlag) * 15;
const int TZOffsetMS = TZOffset * 60 * 1000;
nVal -= TZOffsetMS;
}
else if (nFieldTZFlag == OGR_TZFLAG_MIXED_TZ &&
psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
{
// Convert for psRawField->Date.TZFlag to UTC
const int TZOffset =
Expand Down

0 comments on commit 427c8dc

Please sign in to comment.