Skip to content

Commit

Permalink
Support dates in time_index in smryh (#60)
Browse files Browse the repository at this point in the history
* Validate smryh sections, allowing ISO-dates

* Allow date objects in smryh input

* Support explicit dates for smryh-time_index

* Add TIME_INDEX as a mismatch-df column for smryh
  • Loading branch information
berland authored Oct 1, 2019
1 parent d2fd2c6 commit 6617aa1
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 19 deletions.
2 changes: 1 addition & 1 deletion docs/advancedusage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ like:
smryh:
- key: FOPT
histvec: FOPTH
time_index: monthly # or yearly, daily, raw or last
time_index: monthly # or yearly, daily, raw or last, or a ISO-date
This file can be loaded in Python:

Expand Down
81 changes: 75 additions & 6 deletions src/fmu/ensemble/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Observations(object):
summary vector, it can also be a time-series. Mismatches will
be computed pr. observation unit.
Pay attentiont to mismatch versus misfit. Here, mismatch is used
Pay attention to mismatch versus misfit. Here, mismatch is used
for individual observation units, while misfit is used as single
number for whole realizations.
Expand Down Expand Up @@ -311,15 +311,36 @@ def _realization_mismatch(self, real):
)
if obstype == "smryh":
if "time_index" in obsunit:
sim_hist = real.get_smry(
time_index=obsunit["time_index"],
column_keys=[obsunit["key"], obsunit["histvec"]],
)
if isinstance(obsunit["time_index"], str):
sim_hist = real.get_smry(
time_index=obsunit["time_index"],
column_keys=[obsunit["key"], obsunit["histvec"]],
)
elif isinstance(
obsunit["time_index"], (datetime.datetime, datetime.date)
):
# real.get_smry only allows strings or
# list of datetimes as time_index.
sim_hist = real.get_smry(
time_index=[obsunit["time_index"]],
column_keys=[obsunit["key"], obsunit["histvec"]],
)
else:
logger.error(
(
"obsunit-timeindex was not string or date object\n"
"Should not be possible, file a bug report"
)
)
logger.error(obsunit["time_index"])
logger.error(type(obsunit["time_index"]))
time_index_str = str(obsunit["time_index"])
else:
sim_hist = real.get_smry(
column_keys=[obsunit["key"], obsunit["histvec"]]
# (let get_smry() determine the possible time_index)
)
time_index_str = ""
# If empty df returned, we don't have the data for this:
if sim_hist.empty:
logger.warning(
Expand All @@ -340,6 +361,7 @@ def _realization_mismatch(self, real):
MEASERROR=measerror,
L1=sim_hist["mismatch"].abs().sum(),
L2=math.sqrt((sim_hist["mismatch"] ** 2).sum()),
TIME_INDEX=time_index_str,
)
)
if obstype == "smry":
Expand Down Expand Up @@ -437,7 +459,54 @@ def _clean_observations(self):
type(self.observations[key]),
)
self.observations.pop(key)

# Check smryh observations for validity
if "smryh" in self.observations.keys():
smryhunits = self.observations["smryh"]
if not isinstance(smryhunits, list):
logger.warning(
"smryh must consist of a list, deleting: %s", str(smryhunits)
)
del self.observations["smryh"]
for unit in smryhunits:
if not isinstance(unit, (dict, OrderedDict)):
logger.warning("smryh-units must be dicts, deleting: %s", str(unit))
del smryhunits[smryhunits.index(unit)]
continue
if not ("key" in unit and "histvec" in unit):
logger.warning(
(
"smryh units must contain both 'key' and "
"'histvec', deleting: %s",
str(unit),
)
)
del smryhunits[smryhunits.index(unit)]
continue
# If time_index is not a supported mnemonic,
# parse it to a date object
if "time_index" in unit:
if unit["time_index"] not in [
"raw",
"report",
"yearly",
"daily",
"last",
"monthly",
] and not isinstance(unit["time_index"], datetime.datetime):
try:
unit["time_index"] = dateutil.parser.isoparse(
unit["time_index"]
).date()
except (TypeError, ValueError) as exception:
logger.warning(
"Parsing date %s failed with error",
(str(unit["time_index"]), str(exception)),
)
del smryhunits[smryhunits.index(unit)]
continue
# If everything has been deleted through cleanup, delete the section
if not smryhunits:
del self.observations["smryh"]
# Check smry observations for validity
if "smry" in self.observations.keys():
# We already know that observations['smry'] is a list
Expand Down
61 changes: 49 additions & 12 deletions tests/test_observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import glob
import datetime
import dateutil
import yaml
import pandas as pd
import numpy as np
Expand Down Expand Up @@ -302,18 +303,59 @@ def test_smryh():
obs_last = Observations(
{"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "last"}]}
)
obs_isodatestr = Observations(
{"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "2003-02-01"}]}
)
obs_future = Observations(
{"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "3003-02-01"}]}
)
obs_past = Observations(
{"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "1003-02-01"}]}
)

assert obs_isodatestr
obs_isodate = Observations(
{
"smryh": [
{
"key": "FOPT",
"histvec": "FOPTH",
"time_index": dateutil.parser.isoparse("2003-02-01"),
}
]
}
)
assert obs_isodate

obs_error = Observations(
{"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "ølasjkdf"}]}
)
assert not obs_error
obs_error2 = Observations(
{"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": 4.43}]}
)
assert not obs_error2

mismatchyearly = obs_yearly.mismatch(ens)
mismatchmonthly = obs_monthly.mismatch(ens)
mismatchdaily = obs_daily.mismatch(ens)
mismatchlast = obs_last.mismatch(ens)
mismatchraw = obs_raw.mismatch(ens)
assert mismatchraw["TIME_INDEX"].unique() == ["raw"]

mismatchdate = obs_isodate.mismatch(ens)
assert "2003-02-01" in mismatchdate["TIME_INDEX"].unique()[0]

mismatchdatestr = obs_isodatestr.mismatch(ens)
# There might be a clock time included
assert "2003-02-01" in mismatchdatestr["TIME_INDEX"].unique()[0]
assert all(mismatchdate["L1"] == mismatchdatestr["L1"])

mismatchfuture = obs_future.mismatch(ens)
assert all(mismatchfuture["L1"] == mismatchlast["L1"])

mismatchpast = obs_past.mismatch(ens)
assert np.isclose(sum(mismatchpast["L2"]), 0.0)

# When only one datapoint is included, these should be identical:
assert (mismatchlast["L1"] == mismatchlast["L2"]).all()
Expand All @@ -323,16 +365,6 @@ def test_smryh():
assert mismatchyearly["L2"].sum != mismatchmonthly["L2"].sum()
assert mismatchdaily["L2"].sum != mismatchraw["L2"].sum()

with pytest.raises(ValueError):
obs_error.mismatch(ens)
with pytest.raises(TypeError):
# Improve here, this should give ValueError instead
obs_error2.mismatch(ens)

print(mismatchlast)
print(mismatchdaily)
print(obs_raw.mismatch(ens))


def test_ens_mismatch():
"""Test calculation of mismatch to ensemble data"""
Expand Down Expand Up @@ -399,8 +431,13 @@ def test_vens_mismatch():
)
assert (
(
mismatch.sort_values("REAL").reset_index(drop=True)
== obs_monthly.mismatch(ens).sort_values("REAL").reset_index(drop=True)
mismatch.sort_values("REAL")
.reset_index(drop=True)
.drop("TIME_INDEX", axis=1)
== obs_monthly.mismatch(ens)
.sort_values("REAL")
.reset_index(drop=True)
.drop("TIME_INDEX", axis=1)
)
.all()
.all()
Expand Down

0 comments on commit 6617aa1

Please sign in to comment.