Skip to content

Commit

Permalink
Merge pull request #298 from openscm/future-warning-pd
Browse files Browse the repository at this point in the history
Fix deprecation warnings
  • Loading branch information
znicholls authored Jan 29, 2024
2 parents 7813392 + ea5bcb5 commit de09428
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 45 deletions.
3 changes: 3 additions & 0 deletions changelog/298.improvement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Update to avoid hitting DeprecationWarning in pandas and seaborn

This should help reduce so many warnings appearing when doing common operations.
1 change: 1 addition & 0 deletions changelog/298.trivial.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Lazy load pyam
14 changes: 1 addition & 13 deletions docs/source/notebooks/plotting-with-seaborn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.14.5
# jupytext_version: 1.15.2
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
Expand All @@ -23,7 +23,6 @@
# more than the most basic plots.

# %%

import matplotlib.pyplot as plt
import seaborn as sns

Expand All @@ -36,7 +35,6 @@
# For this notebook we use the RCMIP radiative forcings, available at rcmip.org.

# %%

rcmip_db = ScmRun("rcmip-radiative-forcing-annual-means-v4-0-0.csv")
rcmip_db.head()

Expand All @@ -46,7 +44,6 @@
# For the most common plotting patterns, we provide a very simple `lineplot` method in `ScmRun`.

# %%

out = rcmip_db.filter(variable="Effective Radiative Forcing").lineplot()
out

Expand All @@ -60,7 +57,6 @@
# specify the order to display the scenarios in.

# %%

ax = plt.figure(figsize=(16, 9)).add_subplot(111)
rcmip_db.filter(variable="Effective Radiative Forcing").lineplot(
ax=ax,
Expand All @@ -82,7 +78,6 @@
print(rcmip_db.lineplot.__doc__)

# %%

fig, axes = plt.subplots(figsize=(16, 9), nrows=2, ncols=2)

pdb = rcmip_db.filter(variable="Effective Radiative Forcing")
Expand Down Expand Up @@ -113,11 +108,9 @@
# These same options can also be passed to the `timeseries` and `long_data` methods.

# %%

rcmip_db.timeseries(time_axis="year-month")

# %%

rcmip_db.long_data(time_axis="days since 1970-01-01")

# %% [markdown]
Expand Down Expand Up @@ -146,7 +139,6 @@
vars_to_plot

# %%

seaborn_df = rcmip_db.filter(variable=vars_to_plot).long_data()
seaborn_df.head()

Expand All @@ -155,7 +147,6 @@
# [seaborn.relplot](https://seaborn.pydata.org/generated/seaborn.relplot.html).

# %%

sns.relplot(
data=seaborn_df,
x="time",
Expand All @@ -178,17 +169,14 @@
# different scenarios. In such a case we can reshape the data using pandas before using seaborn.

# %%

ts = rcmip_db.filter(variable=vars_to_plot[:4]).timeseries()
ts.head()

# %%

ts_reshaped = ts.unstack("variable").stack("time").reset_index()
ts_reshaped.head()

# %%

sns.pairplot(
ts_reshaped,
hue="scenario",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ xarray = "*"
nc-time-axis = { version = ">=1.2.0", optional = true }
typing-extensions = "*"
matplotlib = { version = "^3.7.1", optional = true }
seaborn = { version = "*", optional = true }
seaborn = { version = ">=0.12.0", optional = true }
netCDF4 = { version = "*", optional = true }
openpyxl = { version = "*", optional = true }
xlrd = { version = "*", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion src/scmdata/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def lineplot(self, time_axis=None, **kwargs): # pragma: no cover
if "scenario" in self.meta_attributes:
kwargs.setdefault("hue", "scenario")

kwargs.setdefault("ci", "sd")
kwargs.setdefault("errorbar", "sd")
kwargs.setdefault("estimator", np.median)

ax = sns.lineplot(data=plt_df, **kwargs)
Expand Down
23 changes: 17 additions & 6 deletions src/scmdata/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
from .offsets import generate_range, to_offset
from .ops import inject_ops_methods
from .plotting import inject_plotting_methods
from .pyam_compat import IamDataFrame, LongDatetimeIamDataFrame
from .time import _TARGET_DTYPE, TimePoints, TimeseriesConverter
from .units import UnitConverter

Expand All @@ -75,6 +74,8 @@

from scmdata.groupby import RunGroupBy

from .pyam_compat import LongDatetimeIamDataFrame

P = ParamSpec("P")


Expand Down Expand Up @@ -513,6 +514,9 @@ def _init_timeseries(
copy_data: bool = False,
**kwargs: Any,
) -> None:
# Lazy load
from .pyam_compat import IamDataFrame

if isinstance(data, np.ndarray):
if columns is None:
raise ValueError("`columns` argument is required")
Expand Down Expand Up @@ -871,7 +875,7 @@ def timeseries(
raise NonUniqueMetadataError(_meta)

if time_axis is None:
columns = self._time_points.to_index()
columns = self._time_points.to_index().infer_objects()
elif time_axis == "year":
columns = self._time_points.years()
elif time_axis == "year-month":
Expand Down Expand Up @@ -902,8 +906,11 @@ def calc_seconds(x):
if len(np.unique(columns)) != len(columns):
raise ValueError(f"Ambiguous time values with time_axis = '{time_axis}'")

df.columns = pd.Index(columns, name="time")
df.index = pd.MultiIndex.from_frame(_meta)
if isinstance(columns, pd.Index):
df.columns = columns
else:
df.columns = pd.Index(columns, name="time")

if drop_all_nan_times:
df = df.dropna(how="all", axis="columns")
Expand Down Expand Up @@ -2366,6 +2373,9 @@ def to_iamdataframe(self) -> LongDatetimeIamDataFrame: # pragma: no cover
ImportError
If `pyam <https://github.com/IAMconsortium/pyam>`_ is not installed
"""
# Lazy load
from .pyam_compat import LongDatetimeIamDataFrame

if LongDatetimeIamDataFrame is None:
raise ImportError(
"pyam is not installed. Features involving IamDataFrame are unavailable"
Expand Down Expand Up @@ -2617,9 +2627,10 @@ def run_append( # noqa: PLR0912, PLR0915
ret._df = pd.concat([ret._df, *to_join_dfs], axis="columns").sort_index()
ret._time_points = TimePoints(ret._df.index.values)
ret._df.index = ret._time_points.to_index()
ret._meta = pd.MultiIndex.from_frame(
pd.concat([ret._meta.to_frame(), *to_join_metas]).astype("category")
)
if not all(m.empty for m in to_join_metas):
ret._meta = pd.MultiIndex.from_frame(
pd.concat([ret._meta.to_frame(), *to_join_metas]).astype("category")
)

if ret._duplicated_meta():
if overlapping_times and duplicate_msg:
Expand Down
17 changes: 5 additions & 12 deletions tests/integration/test_plotting_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_plumeplot_pre_calculated_no_plume_for_one_no_median_for_other_different
style_var="climate_model",
)

assert len(record) == 3
assert len(record) == 3, record
assert (
record[0].message.args[0]
== "Quantile 0.05 not available for a_scenario a_model"
Expand Down Expand Up @@ -202,19 +202,12 @@ def test_plumeplot_non_unique_lines(plumeplot_scmrun):

error_msg = re.escape(
"More than one timeseries for "
"quantile: {}, "
"scenario: {}, "
"variable: {}.\n"
f"quantile: {quantile}, "
f"scenario: {scenario}, "
f"variable: {variable}.\n"
"Please process your data to create unique quantile timeseries "
"before calling :meth:`plumeplot`.\n"
"Found: {}".format(
quantile,
scenario,
variable,
summary_stats.filter(
quantile=quantile, scenario=scenario, variable=variable
),
)
f"Found: {summary_stats.filter(quantile=quantile, scenario=scenario, variable=variable)}"
)
with pytest.raises(ValueError, match=error_msg):
summary_stats.plumeplot(pre_calculated=True)
Expand Down
9 changes: 7 additions & 2 deletions tests/unit/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ def test_lineplot(mock_long_data, mock_seaborn_lineplot, scm_run):
mock_long_data.assert_called_with(time_axis="year")

mock_seaborn_lineplot.assert_called_with(
ci="sd", data=trv, estimator=np.median, hue="scenario", x="time", y="value"
errorbar="sd",
data=trv,
estimator=np.median,
hue="scenario",
x="time",
y="value",
)


Expand All @@ -85,7 +90,7 @@ def test_lineplot_kwargs(mock_long_data, mock_seaborn_lineplot, scm_run):
"x": "x",
"y": "y",
"hue": "hue",
"ci": "ci",
"errorbar": "errorbar",
"estimator": "estimator",
}
trv = "test long_data return value"
Expand Down
7 changes: 0 additions & 7 deletions tests/unit/test_pyam_compat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from unittest import mock

import pandas as pd
import pytest
Expand Down Expand Up @@ -28,9 +27,3 @@ def test_to_int_value_error(test_iam_df):

with pytest.raises(ValueError, match=error_msg):
LongDatetimeIamDataFrame(idf)


@mock.patch("scmdata.run.LongDatetimeIamDataFrame", None)
def test_pyam_missing(scm_run):
with pytest.raises(ImportError):
scm_run.to_iamdataframe()
6 changes: 3 additions & 3 deletions tests/unit/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2031,7 +2031,7 @@ def test_append_duplicate_times(test_append_scm_runs, duplicate_msg):
"Duplicate time points detected, the output will be the average of "
"the duplicates. Set `duplicate_msg=False` to silence this message."
)
assert len(mock_warn_taking_average) == 1
assert len(mock_warn_taking_average) == 1, mock_warn_taking_average
assert str(mock_warn_taking_average[0].message) == warn_msg
else:
assert not mock_warn_taking_average
Expand All @@ -2050,7 +2050,7 @@ def test_append_doesnt_warn_if_continuous_times(test_append_scm_runs):
with warnings.catch_warnings(record=True) as mock_warn_taking_average:
base.append(other)

assert len(mock_warn_taking_average) == 0
assert len(mock_warn_taking_average) == 0, mock_warn_taking_average


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand Down Expand Up @@ -3571,7 +3571,7 @@ def test_lineplot_time_axis(scm_run, time_axis, mod_func):
x="time",
y="value",
estimator=np.median,
ci="sd",
errorbar="sd",
hue="scenario",
other_kwarg="value",
data=mock_return,
Expand Down

0 comments on commit de09428

Please sign in to comment.