Skip to content

Commit

Permalink
Facilitate region-aggregation with inconsistent weights index (#792)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Oct 19, 2023
1 parent f023ec3 commit 5b2f3b6
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 15 deletions.
2 changes: 2 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Release v2.0.0

- [#792](https://github.com/IAMconsortium/pyam/pull/792) Support region-aggregation with weights-index >> data-index

## Highlights

- Use **ixmp4** as dependency for better integration with the IIASA Scenario Explorer database infrastructure
Expand Down
29 changes: 25 additions & 4 deletions pyam/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from itertools import compress

from pyam.index import replace_index_values
from pyam.logging import adjust_log_level
from pyam.logging import adjust_log_level, format_log_message
from pyam.str import find_depth, is_str, reduce_hierarchy
from pyam.utils import KNOWN_FUNCS, is_list_like, to_list
from pyam._compare import _compare
Expand Down Expand Up @@ -116,7 +116,10 @@ def _aggregate_region(
raise ValueError("Using weights and components in one operation not supported.")

# default subregions to all regions other than `region`
subregions = subregions or df._all_other_regions(region, variable)
if weight is None:
subregions = subregions or df._all_other_regions(region, variable)
else:
subregions = subregions or df._all_other_regions(region, [variable, weight])

if not len(subregions):
logger.info(
Expand Down Expand Up @@ -214,10 +217,28 @@ def _agg_weight(data, weight, method, drop_negative_weights):
raise ValueError("Only method 'np.sum' allowed for weighted average.")

weight = weight.droplevel(["variable", "unit"])
data_index = data.droplevel(["variable", "unit"]).index

# check that weights exist for all data rows
missing_weights = data_index.difference(weight.index)
if not missing_weights.empty:
raise ValueError(
format_log_message(
"Missing weights for the following data rows", missing_weights
)
)

if not data.droplevel(["variable", "unit"]).index.equals(weight.index):
raise ValueError("Inconsistent index between variable and weight!")
# warn if no data exists for available weights
missing_data = weight.index.difference(data_index)
if not missing_data.empty:
logger.warning(
format_log_message(
"Ignoring weights for the following missing data rows", missing_data
)
)
weight[missing_data] = np.nan

# remove (and warn) negative values from weights due to strange behavior
if drop_negative_weights is True:
if any(weight < 0):
logger.warning(
Expand Down
65 changes: 56 additions & 9 deletions tests/test_feature_aggregate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
import logging
import re

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_check_aggregate_region_log(simple_df, caplog):
@pytest.mark.parametrize(
"variable",
(
("Primary Energy"),
"Primary Energy",
(["Primary Energy", "Primary Energy|Coal", "Primary Energy|Wind"]),
),
)
Expand All @@ -252,7 +252,7 @@ def test_aggregate_region_append(simple_df, variable):
@pytest.mark.parametrize(
"variable",
(
("Primary Energy"),
"Primary Energy",
(["Primary Energy", "Primary Energy|Coal", "Primary Energy|Wind"]),
),
)
Expand Down Expand Up @@ -315,7 +315,13 @@ def test_aggregate_region_with_weights(simple_df, caplog):
exp = simple_df.filter(variable=v, region="World")
assert_iamframe_equal(simple_df.aggregate_region(v, weight=w), exp)

# test that dropping negative weights works as expected

def test_aggregate_region_with_negative_weights(simple_df, caplog):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
w = "Emissions|CO2"

# dropping negative weights works as expected
neg_weights_df = simple_df.copy()
neg_weights_df._data[18] = -6
exp = simple_df.filter(variable=v, region="World", year=2010)
Expand All @@ -329,24 +335,65 @@ def test_aggregate_region_with_weights(simple_df, caplog):
idx = caplog.messages.index(msg)
assert caplog.records[idx].levelname == "WARNING"

# test that not dropping negative weights works as expected
# *not* dropping negative weights works as expected
exp = simple_df.filter(variable=v, region="World")
exp._data[0] = -8
assert_iamframe_equal(
neg_weights_df.aggregate_region(v, weight=w, drop_negative_weights=False), exp
)


def test_aggregate_region_with_weights_raises(simple_df):
@pytest.mark.parametrize(
"filter_arg,log_message",
(
(dict(year=2010), ""),
(dict(), "model_a scen_a reg_b 2005\n1 "),
),
)
def test_aggregate_region_with_weights_inconsistent_index(
simple_df, caplog, filter_arg, log_message
):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
w = "Emissions|CO2"

# inconsistent index of variable and weight raises an error
_df = simple_df.filter(variable=w, region="reg_b", keep=False)
with pytest.raises(ValueError, match="Inconsistent index between variable and wei"):
log_message = "\n0 " + log_message + "model_a scen_a reg_b 2010"
if simple_df.time_domain == "datetime":
time_col = " time"
log_message = log_message.replace(" 2005", "2005-06-17").replace(
" 2010", "2010-07-21"
)
else:
time_col = "year"

# missing weight row raises an error
_df = simple_df.filter(variable=w, region="reg_b", keep=False, **filter_arg)
match = r"Missing weights for the following data.*\n.*" + re.escape(log_message)
with pytest.raises(ValueError, match=match):
_df.aggregate_region(v, weight=w)

# missing data row prints a warning (data-index is a subset of weight-index)
exp = simple_df.filter(variable=v, region="World")
if not filter_arg:
exp._data[0] = 1.0
exp._data[1] = 30.0
_df = simple_df.filter(variable=v, region="reg_b", keep=False, **filter_arg)
assert_iamframe_equal(_df.aggregate_region(v, weight=w), exp)

msg = (
"Ignoring weights for the following missing data rows:\n"
f" model scenario region {time_col}" + log_message
)

idx = caplog.messages.index(msg)
assert caplog.records[idx].levelname == "WARNING"


def test_aggregate_region_with_weights_raises(simple_df):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
w = "Emissions|CO2"

# using weight and method other than 'sum' raises an error
pytest.raises(ValueError, simple_df.aggregate_region, v, method="max", weight="bar")

Expand Down
2 changes: 1 addition & 1 deletion tests/test_feature_growth_rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,5 @@ def test_growth_rate_timeseries(x2010, rates):
def test_growth_rate_timeseries_fails(value):
"""Check that a timeseries reaching/crossing 0 raises"""

with pytest.raises(ValueError, match="Cannot compute growth rate when*."):
with pytest.raises(ValueError, match="Cannot compute growth rate when"):
growth_rate(pd.Series([1.0, value]))
2 changes: 1 addition & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_not_a_file():

def test_io_list():
# initializing with a list raises an error
match = r"Initializing from list is not supported,*."
match = "Initializing from list is not supported,"
with pytest.raises(ValueError, match=match):
IamDataFrame([1, 2])

Expand Down

0 comments on commit 5b2f3b6

Please sign in to comment.