From 880432f766825b270bfdbe171deb8318babec8ca Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Oct 2023 21:30:20 +0200 Subject: [PATCH 1/6] Clean-up steps --- tests/test_feature_aggregate.py | 14 +++++++++----- tests/test_feature_growth_rate.py | 2 +- tests/test_io.py | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py index 10c27e61b..b9642b7cf 100644 --- a/tests/test_feature_aggregate.py +++ b/tests/test_feature_aggregate.py @@ -1,5 +1,4 @@ import pytest -import logging import numpy as np import pandas as pd @@ -238,7 +237,7 @@ def test_check_aggregate_region_log(simple_df, caplog): @pytest.mark.parametrize( "variable", ( - ("Primary Energy"), + "Primary Energy", (["Primary Energy", "Primary Energy|Coal", "Primary Energy|Wind"]), ), ) @@ -252,7 +251,7 @@ def test_aggregate_region_append(simple_df, variable): @pytest.mark.parametrize( "variable", ( - ("Primary Energy"), + "Primary Energy", (["Primary Energy", "Primary Energy|Coal", "Primary Energy|Wind"]), ), ) @@ -315,7 +314,12 @@ def test_aggregate_region_with_weights(simple_df, caplog): exp = simple_df.filter(variable=v, region="World") assert_iamframe_equal(simple_df.aggregate_region(v, weight=w), exp) - # test that dropping negative weights works as expected +def test_aggregate_region_with_negative_weights(simple_df, caplog): + # carbon price shouldn't be summed but be weighted by emissions + v = "Price|Carbon" + w = "Emissions|CO2" + + # dropping negative weights works as expected neg_weights_df = simple_df.copy() neg_weights_df._data[18] = -6 exp = simple_df.filter(variable=v, region="World", year=2010) @@ -329,7 +333,7 @@ def test_aggregate_region_with_weights(simple_df, caplog): idx = caplog.messages.index(msg) assert caplog.records[idx].levelname == "WARNING" - # test that not dropping negative weights works as expected + # *not* dropping negative weights works as expected exp = simple_df.filter(variable=v, region="World") exp._data[0] = -8 assert_iamframe_equal( diff --git a/tests/test_feature_growth_rate.py b/tests/test_feature_growth_rate.py index fa9ea3b61..708e98bf1 100644 --- a/tests/test_feature_growth_rate.py +++ b/tests/test_feature_growth_rate.py @@ -66,5 +66,5 @@ def test_growth_rate_timeseries(x2010, rates): def test_growth_rate_timeseries_fails(value): """Check that a timeseries reaching/crossing 0 raises""" - with pytest.raises(ValueError, match="Cannot compute growth rate when*."): + with pytest.raises(ValueError, match="Cannot compute growth rate when"): growth_rate(pd.Series([1.0, value])) diff --git a/tests/test_io.py b/tests/test_io.py index c358e9531..fddf9079c 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -43,7 +43,7 @@ def test_not_a_file(): def test_io_list(): # initializing with a list raises an error - match = r"Initializing from list is not supported,*." + match = "Initializing from list is not supported," with pytest.raises(ValueError, match=match): IamDataFrame([1, 2]) From 9c1e81a601dfdc6184413669269be3e0ff6b6968 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Oct 2023 21:31:19 +0200 Subject: [PATCH 2/6] Implement error/warning for inconsistent index --- pyam/aggregation.py | 24 +++++++++++++++++++++--- tests/test_feature_aggregate.py | 27 ++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/pyam/aggregation.py b/pyam/aggregation.py index ee781b660..d779cbc48 100644 --- a/pyam/aggregation.py +++ b/pyam/aggregation.py @@ -4,7 +4,7 @@ from itertools import compress from pyam.index import replace_index_values -from pyam.logging import adjust_log_level +from pyam.logging import adjust_log_level, format_log_message from pyam.str import find_depth, is_str, reduce_hierarchy from pyam.utils import KNOWN_FUNCS, is_list_like, to_list from pyam._compare import _compare @@ -214,10 +214,28 @@ def _agg_weight(data, weight, method, drop_negative_weights): raise ValueError("Only method 'np.sum' allowed for weighted average.") weight = weight.droplevel(["variable", "unit"]) + data_index = data.droplevel(["variable", "unit"]).index - if not data.droplevel(["variable", "unit"]).index.equals(weight.index): - raise ValueError("Inconsistent index between variable and weight!") + # check that weights exist for all data rows + missing_weights = data_index.difference(weight.index) + if not missing_weights.empty: + raise ValueError( + format_log_message( + "Missing weights for the following data rows", missing_weights + ) + ) + + # warn if no data exists for available weights + missing_data = weight.index.difference(data_index) + if not missing_data.empty: + logger.warning( + format_log_message( + "Ignoring weights for the following missing data rows", missing_data + ) + ) + weight[missing_data] = np.nan + # remove (and warn) negative values from weights due to strange behavior if drop_negative_weights is True: if any(weight < 0): logger.warning( diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py index b9642b7cf..4eb547e9f 100644 --- a/tests/test_feature_aggregate.py +++ b/tests/test_feature_aggregate.py @@ -341,16 +341,37 @@ def test_aggregate_region_with_negative_weights(simple_df, caplog): ) -def test_aggregate_region_with_weights_raises(simple_df): +def test_aggregate_region_with_weights_inconsistent_index(simple_df, caplog): # carbon price shouldn't be summed but be weighted by emissions v = "Price|Carbon" w = "Emissions|CO2" - # inconsistent index of variable and weight raises an error + # missing weight row raises an error _df = simple_df.filter(variable=w, region="reg_b", keep=False) - with pytest.raises(ValueError, match="Inconsistent index between variable and wei"): + match = r"Missing weights for the following data.*\n.*\n.*\n.*scen_a reg_b 2010" + with pytest.raises(ValueError, match=match): _df.aggregate_region(v, weight=w) + # missing data row prints a warning (data-index is a subset of weight-index) + exp = simple_df.filter(variable=v, region="World") + exp._data[1] = 30. + _df = simple_df.filter(variable=v, region="reg_b", year=2010, keep=False) + assert_iamframe_equal(_df.aggregate_region(v, weight=w), exp) + + msg = ( + "Ignoring weights for the following missing data rows:\n" + " model scenario region year\n" + "0 model_a scen_a reg_b 2010" + ) + idx = caplog.messages.index(msg) + assert caplog.records[idx].levelname == "WARNING" + + +def test_aggregate_region_with_weights_raises(simple_df): + # carbon price shouldn't be summed but be weighted by emissions + v = "Price|Carbon" + w = "Emissions|CO2" + # using weight and method other than 'sum' raises an error pytest.raises(ValueError, simple_df.aggregate_region, v, method="max", weight="bar") From bacab9d0459cc57899e3ddc69c433ae6fe6ccfd0 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Oct 2023 21:31:26 +0200 Subject: [PATCH 3/6] Add to release notes --- RELEASE_NOTES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index e7d85bcce..73157d07b 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,7 @@ # Release v2.0.0 +- [#789](https://github.com/IAMconsortium/pyam/pull/789) Support region-aggregation with weights-index >> data-index + ## Highlights - Use **ixmp4** as dependency for better integration with the IIASA Scenario Explorer database infrastructure From 1042938b75418c527709037d91b550a0482535d2 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Oct 2023 22:30:35 +0200 Subject: [PATCH 4/6] Extend tests to make sure that method works with same/different region index inconsistency --- pyam/aggregation.py | 5 ++++- tests/test_feature_aggregate.py | 34 ++++++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/pyam/aggregation.py b/pyam/aggregation.py index d779cbc48..a02c02818 100644 --- a/pyam/aggregation.py +++ b/pyam/aggregation.py @@ -116,7 +116,10 @@ def _aggregate_region( raise ValueError("Using weights and components in one operation not supported.") # default subregions to all regions other than `region` - subregions = subregions or df._all_other_regions(region, variable) + if weight is None: + subregions = subregions or df._all_other_regions(region, variable) + else: + subregions = subregions or df._all_other_regions(region, [variable, weight]) if not len(subregions): logger.info( diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py index 4eb547e9f..74b6ebc60 100644 --- a/tests/test_feature_aggregate.py +++ b/tests/test_feature_aggregate.py @@ -1,4 +1,5 @@ import pytest +import re import numpy as np import pandas as pd @@ -314,6 +315,7 @@ def test_aggregate_region_with_weights(simple_df, caplog): exp = simple_df.filter(variable=v, region="World") assert_iamframe_equal(simple_df.aggregate_region(v, weight=w), exp) + def test_aggregate_region_with_negative_weights(simple_df, caplog): # carbon price shouldn't be summed but be weighted by emissions v = "Price|Carbon" @@ -341,28 +343,46 @@ def test_aggregate_region_with_negative_weights(simple_df, caplog): ) -def test_aggregate_region_with_weights_inconsistent_index(simple_df, caplog): +@pytest.mark.parametrize( + "filter_arg,log_message", + ( + (dict(year=2010), ""), + (dict(), "model_a scen_a reg_b 2005\n1 "), + ), +) +def test_aggregate_region_with_weights_inconsistent_index( + simple_df, caplog, filter_arg, log_message +): # carbon price shouldn't be summed but be weighted by emissions v = "Price|Carbon" w = "Emissions|CO2" + log_message = "\n0 " + log_message + "model_a scen_a reg_b 2010" + if simple_df.time_domain == "datetime": + time_col = " time" + log_message = log_message.replace("2005", "2005-6-17").replace(" 2010", "2010-07-21") + else: + time_col = "year" + # missing weight row raises an error - _df = simple_df.filter(variable=w, region="reg_b", keep=False) - match = r"Missing weights for the following data.*\n.*\n.*\n.*scen_a reg_b 2010" + _df = simple_df.filter(variable=w, region="reg_b", keep=False, **filter_arg) + match = r"Missing weights for the following data.*\n.*" + re.escape(log_message) with pytest.raises(ValueError, match=match): _df.aggregate_region(v, weight=w) # missing data row prints a warning (data-index is a subset of weight-index) exp = simple_df.filter(variable=v, region="World") - exp._data[1] = 30. - _df = simple_df.filter(variable=v, region="reg_b", year=2010, keep=False) + if not filter_arg: + exp._data[0] = 1. + exp._data[1] = 30.0 + _df = simple_df.filter(variable=v, region="reg_b", keep=False, **filter_arg) assert_iamframe_equal(_df.aggregate_region(v, weight=w), exp) msg = ( "Ignoring weights for the following missing data rows:\n" - " model scenario region year\n" - "0 model_a scen_a reg_b 2010" + f" model scenario region {time_col}" + log_message ) + idx = caplog.messages.index(msg) assert caplog.records[idx].levelname == "WARNING" From 6d1170064f3da6ca2a53aae87ac5034b34ac1c23 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Tue, 17 Oct 2023 22:36:40 +0200 Subject: [PATCH 5/6] Fix the log message testing --- tests/test_feature_aggregate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py index 74b6ebc60..54f75ba9a 100644 --- a/tests/test_feature_aggregate.py +++ b/tests/test_feature_aggregate.py @@ -360,7 +360,9 @@ def test_aggregate_region_with_weights_inconsistent_index( log_message = "\n0 " + log_message + "model_a scen_a reg_b 2010" if simple_df.time_domain == "datetime": time_col = " time" - log_message = log_message.replace("2005", "2005-6-17").replace(" 2010", "2010-07-21") + log_message = log_message.replace(" 2005", "2005-06-17").replace( + " 2010", "2010-07-21" + ) else: time_col = "year" @@ -373,7 +375,7 @@ def test_aggregate_region_with_weights_inconsistent_index( # missing data row prints a warning (data-index is a subset of weight-index) exp = simple_df.filter(variable=v, region="World") if not filter_arg: - exp._data[0] = 1. + exp._data[0] = 1.0 exp._data[1] = 30.0 _df = simple_df.filter(variable=v, region="reg_b", keep=False, **filter_arg) assert_iamframe_equal(_df.aggregate_region(v, weight=w), exp) From 93e39a6eb13be02841168624ce06badc3e4e6400 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Wed, 18 Oct 2023 07:41:54 +0200 Subject: [PATCH 6/6] Fix release notes reference --- RELEASE_NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 73157d07b..d373650f1 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,6 +1,6 @@ # Release v2.0.0 -- [#789](https://github.com/IAMconsortium/pyam/pull/789) Support region-aggregation with weights-index >> data-index +- [#792](https://github.com/IAMconsortium/pyam/pull/792) Support region-aggregation with weights-index >> data-index ## Highlights