Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for summary files beyond year 2262 #238

Merged
merged 2 commits into from
Dec 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 100 additions & 6 deletions src/fmu/ensemble/util/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import dateutil
import pandas as pd
import logging
from typing import List, Tuple

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -33,8 +34,12 @@ def date_range(start_date, end_date, freq):
Returns:
list of datetimes
"""
freq = PD_FREQ_MNEMONICS.get(freq, freq)
return pd.date_range(start_date, end_date, freq=freq)
try:
return pd.date_range(
start_date, end_date, freq=PD_FREQ_MNEMONICS.get(freq, freq)
)
except pd.errors.OutOfBoundsDatetime:
return _fallback_date_range(start_date, end_date, freq)


def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date=None):
Expand Down Expand Up @@ -125,7 +130,9 @@ def unionize_smry_dates(eclsumsdates, freq, normalize, start_date=None, end_date
return datetimes


def normalize_dates(start_date, end_date, freq):
def normalize_dates(
start_date: datetime.date, end_date: datetime.date, freq: str
) -> Tuple[datetime.date, datetime.date]:
"""
Normalize start and end date according to frequency
by extending the time range.
Expand All @@ -145,6 +152,93 @@ def normalize_dates(start_date, end_date, freq):
Return:
Tuple of normalized (start_date, end_date)
"""
freq = PD_FREQ_MNEMONICS.get(freq, freq)
offset = pd.tseries.frequencies.to_offset(freq)
return (offset.rollback(start_date).date(), offset.rollforward(end_date).date())
offset = pd.tseries.frequencies.to_offset(PD_FREQ_MNEMONICS.get(freq, freq))
try:
start_normalized = offset.rollback(start_date).date()
except pd.errors.OutOfBoundsDatetime:
# Pandas only supports datetime up to year 2262
start_normalized = _fallback_date_roll(
datetime.datetime.combine(start_date, datetime.time()), "back", freq
).date()
try:
end_normalized = offset.rollforward(end_date).date()
except pd.errors.OutOfBoundsDatetime:
# Pandas only supports datetime up to year 2262
end_normalized = _fallback_date_roll(
datetime.datetime.combine(end_date, datetime.time()), "forward", freq
).date()

return (start_normalized, end_normalized)


def _fallback_date_roll(
rollme: datetime.datetime, direction: str, freq: str
) -> datetime.datetime:
"""Fallback function for rolling dates forward or backward onto a
date frequency boundary.

This function reimplements pandas' DateOffset.roll_forward() and backward()
only for monthly and yearly frequency. This is necessary as Pandas does not
support datetimes beyond year 2262 due to all datetimes in Pandas being
represented by nanosecond accuracy.

This function is a fallback only, to keep support for using all Pandas timeoffsets
in situations where years beyond 2262 is not a issue."""
if direction not in ["back", "forward"]:
raise ValueError(f"Unknown direction {direction}")

if freq == "yearly":
if direction == "forward":
if rollme <= datetime.datetime(year=rollme.year, month=1, day=1):
return datetime.datetime(year=rollme.year, month=1, day=1)
return datetime.datetime(year=rollme.year + 1, month=1, day=1)
return datetime.datetime(year=rollme.year, month=1, day=1)

if freq == "monthly":
if direction == "forward":
if rollme <= datetime.datetime(year=rollme.year, month=rollme.month, day=1):
return datetime.datetime(year=rollme.year, month=rollme.month, day=1)
return datetime.datetime(
year=rollme.year, month=rollme.month, day=1
) + dateutil.relativedelta.relativedelta( # type: ignore
months=1
)
return datetime.datetime(year=rollme.year, month=rollme.month, day=1)

raise ValueError(
"Only yearly or monthly frequencies are "
"supported for simulations beyond year 2262"
)


def _fallback_date_range(
start: datetime.date, end: datetime.date, freq: str
) -> List[datetime.datetime]:
"""Fallback routine for generating date ranges beyond Pandas datetime64[ns]
year-2262 limit.

Assumes that the start and end times already fall on a frequency boundary.
"""
if start == end:
return [datetime.datetime.combine(start, datetime.datetime.min.time())]
if end < start:
return []
if freq == "yearly":
dates = [datetime.datetime.combine(start, datetime.datetime.min.time())] + [
datetime.datetime(year=year, month=1, day=1)
for year in range(start.year + 1, end.year + 1)
]
if datetime.datetime.combine(end, datetime.datetime.min.time()) != dates[-1]:
dates = dates + [
datetime.datetime.combine(end, datetime.datetime.min.time())
]
return dates
if freq == "monthly":
dates = []
date = datetime.datetime.combine(start, datetime.datetime.min.time())
enddatetime = datetime.datetime.combine(end, datetime.datetime.min.time())
while date <= enddatetime:
dates.append(date)
date = date + dateutil.relativedelta.relativedelta(months=1) # type: ignore
return dates
raise ValueError("Unsupported frequency for datetimes beyond year 2262")
182 changes: 182 additions & 0 deletions tests/test_dates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
from datetime import datetime as dt

import pytest

from fmu.ensemble.util.dates import _fallback_date_roll, date_range

# These tests are duplicated from https://github.com/equinor/res2df/blob/master/tests/test_summary.py


@pytest.mark.parametrize(
"rollme, direction, freq, expected",
[
(
dt(3000, 1, 1),
"forward",
"yearly",
dt(3000, 1, 1),
),
(
dt(3000, 1, 1),
"forward",
"monthly",
dt(3000, 1, 1),
),
(
dt(3000, 1, 2),
"forward",
"yearly",
dt(3001, 1, 1),
),
(
dt(3000, 1, 2),
"forward",
"monthly",
dt(3000, 2, 1),
),
(
dt(3000, 1, 1),
"back",
"yearly",
dt(3000, 1, 1),
),
(
dt(3000, 1, 1),
"back",
"monthly",
dt(3000, 1, 1),
),
(
dt(3000, 12, 31),
"back",
"yearly",
dt(3000, 1, 1),
),
(
dt(3000, 2, 2),
"back",
"monthly",
dt(3000, 2, 1),
),
pytest.param(
dt(3000, 2, 2),
"forward",
"daily",
None,
marks=pytest.mark.xfail(raises=ValueError),
),
pytest.param(
dt(3000, 2, 2),
"upwards",
"yearly",
None,
marks=pytest.mark.xfail(raises=ValueError),
),
],
)
def test_fallback_date_roll(rollme, direction, freq, expected):
"""The pandas date rolling does not always work for years beyound 2262. The
code should fallback automatically to hide that Pandas limitation"""
assert _fallback_date_roll(rollme, direction, freq) == expected


@pytest.mark.parametrize(
"start, end, freq, expected",
[
(
dt(3000, 1, 1),
dt(3002, 1, 1),
"yearly",
[
dt(3000, 1, 1),
dt(3001, 1, 1),
dt(3002, 1, 1),
],
),
(
dt(2999, 11, 1),
dt(3000, 2, 1),
"monthly",
[
dt(2999, 11, 1),
dt(2999, 12, 1),
dt(3000, 1, 1),
dt(3000, 2, 1),
],
),
pytest.param(
dt(3000, 1, 1),
dt(3000, 2, 1),
"weekly",
None,
marks=pytest.mark.xfail(raises=ValueError),
),
(
# Crossing the problematic time boundary:
dt(2260, 1, 1),
dt(2263, 1, 1),
"yearly",
[
dt(2260, 1, 1),
dt(2261, 1, 1),
dt(2262, 1, 1),
dt(2263, 1, 1),
],
),
(
dt(3000, 1, 1),
dt(3000, 1, 1),
"yearly",
[
dt(3000, 1, 1),
],
),
(
dt(2000, 1, 1),
dt(2000, 1, 1),
"yearly",
[
dt(2000, 1, 1),
],
),
(
dt(2000, 1, 1),
dt(1000, 1, 1),
"yearly",
[],
),
(
dt(3000, 1, 1),
dt(2000, 1, 1),
"yearly",
[],
),
(
dt(2300, 5, 6),
dt(2302, 3, 1),
"yearly",
[
dt(2300, 5, 6),
dt(2301, 1, 1),
dt(2302, 1, 1),
dt(2302, 3, 1),
],
),
(
dt(2304, 5, 6),
dt(2302, 3, 1),
"yearly",
[],
),
(
dt(2302, 3, 1),
dt(2302, 3, 1),
"yearly",
[dt(2302, 3, 1)],
),
],
)
def test_date_range(start, end, freq, expected):
"""When dates are beyond year 2262,
the function _fallback_date_range() is triggered."""
assert date_range(start, end, freq) == expected
44 changes: 37 additions & 7 deletions tests/test_realization.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
"""Testing fmu-ensemble."""
# pylint: disable=protected-access

import os
import datetime
import shutil
import logging
import os
import shutil
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
import yaml
from dateutil.relativedelta import relativedelta

import pytest
from resdata.summary import Summary

import numpy as np

from .test_ensembleset import symlink_iter
from fmu import ensemble

from .test_ensembleset import symlink_iter

try:
SKIP_FMU_TOOLS = False
Expand Down Expand Up @@ -615,6 +615,36 @@ def test_singlereal_ecl(tmp="TMP"):
"FOPT" in real["unsmry--yearly"]


def test_can_import_summary_files_beyond_2262(tmpdir, monkeypatch):
"""Pandas is/has been eager to use datetime64[ns] which overflows in year 2262,
ensure this limitation is sufficiently worked around."""
monkeypatch.chdir(tmpdir)
res_sum = Summary.from_pandas(
"TESTCASE",
pd.DataFrame(
[
{"DATE": datetime.date(2000, 1, 1), "FPR": 200},
{"DATE": datetime.date(2263, 1, 1), "FPR": 1},
]
).set_index("DATE"),
)
runpath = "realization-0/iter-0"
Path(runpath).mkdir(parents=True)
os.chdir(runpath)
# fwrite() can only write to cwd
Summary.fwrite(res_sum)
os.chdir(tmpdir)

real = ensemble.ScratchRealization(runpath)
real.find_files("TESTCASE.UNSMRY")
for time_index in ["raw", "monthly", "yearly"]:
assert "2263-01-01" in str(
real.get_smry(column_keys="*", time_index=time_index)
)
with pytest.raises(ValueError):
real.get_smry(column_keys="*", time_index="weekly")


kwinkunks marked this conversation as resolved.
Show resolved Hide resolved
def test_independent_realization(tmp="TMP"):
"""Test what we are able to load a single Eclipse run
that might have nothing to do with FMU"""
Expand Down