Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Facilitate region-aggregation with inconsistent model scenario region time #792

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Release v2.0.0

- [#792](https://github.com/IAMconsortium/pyam/pull/792) Support region-aggregation with weights-index >> data-index

## Highlights

- Use **ixmp4** as dependency for better integration with the IIASA Scenario Explorer database infrastructure
Expand Down
29 changes: 25 additions & 4 deletions pyam/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from itertools import compress

from pyam.index import replace_index_values
from pyam.logging import adjust_log_level
from pyam.logging import adjust_log_level, format_log_message
from pyam.str import find_depth, is_str, reduce_hierarchy
from pyam.utils import KNOWN_FUNCS, is_list_like, to_list
from pyam._compare import _compare
Expand Down Expand Up @@ -116,7 +116,10 @@ def _aggregate_region(
raise ValueError("Using weights and components in one operation not supported.")

# default subregions to all regions other than `region`
subregions = subregions or df._all_other_regions(region, variable)
if weight is None:
subregions = subregions or df._all_other_regions(region, variable)
else:
subregions = subregions or df._all_other_regions(region, [variable, weight])

if not len(subregions):
logger.info(
Expand Down Expand Up @@ -214,10 +217,28 @@ def _agg_weight(data, weight, method, drop_negative_weights):
raise ValueError("Only method 'np.sum' allowed for weighted average.")

weight = weight.droplevel(["variable", "unit"])
data_index = data.droplevel(["variable", "unit"]).index

# check that weights exist for all data rows
missing_weights = data_index.difference(weight.index)
if not missing_weights.empty:
raise ValueError(
format_log_message(
"Missing weights for the following data rows", missing_weights
)
)

if not data.droplevel(["variable", "unit"]).index.equals(weight.index):
raise ValueError("Inconsistent index between variable and weight!")
# warn if no data exists for available weights
missing_data = weight.index.difference(data_index)
if not missing_data.empty:
logger.warning(
format_log_message(
"Ignoring weights for the following missing data rows", missing_data
)
)
weight[missing_data] = np.nan

# remove (and warn) negative values from weights due to strange behavior
if drop_negative_weights is True:
if any(weight < 0):
logger.warning(
Expand Down
65 changes: 56 additions & 9 deletions tests/test_feature_aggregate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
import logging
import re

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_check_aggregate_region_log(simple_df, caplog):
@pytest.mark.parametrize(
"variable",
(
("Primary Energy"),
"Primary Energy",
(["Primary Energy", "Primary Energy|Coal", "Primary Energy|Wind"]),
),
)
Expand All @@ -252,7 +252,7 @@ def test_aggregate_region_append(simple_df, variable):
@pytest.mark.parametrize(
"variable",
(
("Primary Energy"),
"Primary Energy",
(["Primary Energy", "Primary Energy|Coal", "Primary Energy|Wind"]),
),
)
Expand Down Expand Up @@ -315,7 +315,13 @@ def test_aggregate_region_with_weights(simple_df, caplog):
exp = simple_df.filter(variable=v, region="World")
assert_iamframe_equal(simple_df.aggregate_region(v, weight=w), exp)

# test that dropping negative weights works as expected

def test_aggregate_region_with_negative_weights(simple_df, caplog):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
w = "Emissions|CO2"

# dropping negative weights works as expected
neg_weights_df = simple_df.copy()
neg_weights_df._data[18] = -6
exp = simple_df.filter(variable=v, region="World", year=2010)
Expand All @@ -329,24 +335,65 @@ def test_aggregate_region_with_weights(simple_df, caplog):
idx = caplog.messages.index(msg)
assert caplog.records[idx].levelname == "WARNING"

# test that not dropping negative weights works as expected
# *not* dropping negative weights works as expected
exp = simple_df.filter(variable=v, region="World")
exp._data[0] = -8
assert_iamframe_equal(
neg_weights_df.aggregate_region(v, weight=w, drop_negative_weights=False), exp
)


def test_aggregate_region_with_weights_raises(simple_df):
@pytest.mark.parametrize(
"filter_arg,log_message",
(
(dict(year=2010), ""),
(dict(), "model_a scen_a reg_b 2005\n1 "),
),
)
def test_aggregate_region_with_weights_inconsistent_index(
simple_df, caplog, filter_arg, log_message
):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
w = "Emissions|CO2"

# inconsistent index of variable and weight raises an error
_df = simple_df.filter(variable=w, region="reg_b", keep=False)
with pytest.raises(ValueError, match="Inconsistent index between variable and wei"):
log_message = "\n0 " + log_message + "model_a scen_a reg_b 2010"
if simple_df.time_domain == "datetime":
time_col = " time"
log_message = log_message.replace(" 2005", "2005-06-17").replace(
" 2010", "2010-07-21"
)
else:
time_col = "year"

# missing weight row raises an error
_df = simple_df.filter(variable=w, region="reg_b", keep=False, **filter_arg)
match = r"Missing weights for the following data.*\n.*" + re.escape(log_message)
with pytest.raises(ValueError, match=match):
_df.aggregate_region(v, weight=w)

# missing data row prints a warning (data-index is a subset of weight-index)
exp = simple_df.filter(variable=v, region="World")
if not filter_arg:
exp._data[0] = 1.0
exp._data[1] = 30.0
_df = simple_df.filter(variable=v, region="reg_b", keep=False, **filter_arg)
assert_iamframe_equal(_df.aggregate_region(v, weight=w), exp)

msg = (
"Ignoring weights for the following missing data rows:\n"
f" model scenario region {time_col}" + log_message
)

idx = caplog.messages.index(msg)
assert caplog.records[idx].levelname == "WARNING"


def test_aggregate_region_with_weights_raises(simple_df):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
w = "Emissions|CO2"

# using weight and method other than 'sum' raises an error
pytest.raises(ValueError, simple_df.aggregate_region, v, method="max", weight="bar")

Expand Down
2 changes: 1 addition & 1 deletion tests/test_feature_growth_rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,5 @@ def test_growth_rate_timeseries(x2010, rates):
def test_growth_rate_timeseries_fails(value):
"""Check that a timeseries reaching/crossing 0 raises"""

with pytest.raises(ValueError, match="Cannot compute growth rate when*."):
with pytest.raises(ValueError, match="Cannot compute growth rate when"):
growth_rate(pd.Series([1.0, value]))
2 changes: 1 addition & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_not_a_file():

def test_io_list():
# initializing with a list raises an error
match = r"Initializing from list is not supported,*."
match = "Initializing from list is not supported,"
with pytest.raises(ValueError, match=match):
IamDataFrame([1, 2])

Expand Down
Loading