Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Refactor tests for region-aggregation with weights" #788

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions pyam/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _aggregate(df, variable, components=None, method="sum"):
)

mapping = {}
msg = "Cannot aggregate variable '{}' because it has no components."
msg = "Cannot aggregate variable '{}' because it has no components!"
# if single variable
if is_str(variable):
# default components to all variables one level below `variable`
Expand Down Expand Up @@ -109,19 +109,19 @@ def _aggregate_region(
"""Internal implementation for aggregating data over subregions"""
if not is_str(variable) and components is not False:
raise ValueError(
"Aggregating by list of variables with components is not supported."
"Aggregating by list of variables with components is not supported!"
)

if weight is not None and components is not False:
raise ValueError("Using weights and components in one operation not supported.")
raise ValueError("Using weights and components in one operation not supported!")

# default subregions to all regions other than `region`
subregions = subregions or df._all_other_regions(region, variable)

if not len(subregions):
logger.info(
f"Cannot aggregate variable '{variable}' to '{region}' "
"because it does not exist in any subregion."
"because it does not exist in any subregion!"
)
return

Expand All @@ -131,7 +131,7 @@ def _aggregate_region(
if weight is None:
if drop_negative_weights is False:
raise ValueError(
"Dropping negative weights can only be used with `weights`."
"Dropping negative weights can only be used with `weights`!"
)

_data = _group_and_agg(subregion_df._data[rows], "region", method=method)
Expand Down Expand Up @@ -208,7 +208,7 @@ def _agg_weight(data, weight, method, drop_negative_weights):

# only summation allowed with weights
if method not in ["sum", np.sum]:
raise ValueError("Only method 'np.sum' allowed for weighted average.")
raise ValueError("Only method 'np.sum' allowed for weighted average!")

weight = weight.droplevel(["variable", "unit"])

Expand All @@ -218,11 +218,12 @@ def _agg_weight(data, weight, method, drop_negative_weights):
if drop_negative_weights is True:
if any(weight < 0):
logger.warning(
"Some weights are negative. Data weighted by negative values will be "
"dropped. To use both positive and negative weights, "
"Some of the weights are negative. "
"All data weighted by negative values will be dropped. "
"To apply both positive and negative weights to the data, "
"please use the keyword argument `drop_negative_weights=False`."
)
# drop negative weights
# Drop negative weights
weight[weight < 0] = None

col1 = data.index.names.difference(["region"])
Expand All @@ -241,4 +242,4 @@ def _get_method_func(method):
return KNOWN_FUNCS[method]

# raise error if `method` is a string but not in dict of known methods
raise ValueError(f"Unknown method: {method}")
raise ValueError(f"'{method}' is not a known method!")
10 changes: 5 additions & 5 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1464,7 +1464,7 @@ def aggregate_region(
"""Aggregate timeseries data by subregions.

This function allows to add variable sub-categories that are only
defined at the `region` level by setting `components=True`.
defined at the `region` level by setting `components=True`

Parameters
----------
Expand Down Expand Up @@ -1589,7 +1589,7 @@ def check_aggregate_region(
# filter and groupby data, use `pd.Series.align` for matching index
rows = self._apply_filters(region=region, variable=variable)
if not rows.any():
logger.info(f"Variable '{variable}' does not exist in region '{region}'.")
logger.info(f"Variable '{variable}' does not exist in region '{region}'!")
return

df_region, df_subregions = _group_and_agg(self._data[rows], "region").align(
Expand Down Expand Up @@ -1630,7 +1630,7 @@ def aggregate_time(
method="sum",
append=False,
):
"""Aggregate timeseries data by subannual time resolution.
"""Aggregate timeseries data by subannual time resolution

Parameters
----------
Expand Down Expand Up @@ -1671,7 +1671,7 @@ def downscale_region(
weight=None,
append=False,
):
"""Downscale timeseries data to a number of subregions.
"""Downscale timeseries data to a number of subregions

Parameters
----------
Expand Down Expand Up @@ -1742,7 +1742,7 @@ def _get_cols(self, cols):
return META_IDX + cols + self.extra_cols

def check_internal_consistency(self, components=False, **kwargs):
"""Check whether a scenario ensemble is internally consistent.
"""Check whether a scenario ensemble is internally consistent

We check that all variables are equal to the sum of their sectoral
components and that all the regions add up to the World total. If
Expand Down
103 changes: 61 additions & 42 deletions tests/test_feature_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,20 @@
columns=LONG_IDX + ["value"],
)

NEG_WEIGHTS_DF = pd.DataFrame(
[
["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2005, -4.0],
["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2010, 5.0],
["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2005, 2.0],
["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2010, 3.0],
["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2005, 6.0],
["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2010, 6.0],
["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2005, 3.0],
["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2010, 4.0],
],
columns=LONG_IDX + ["value"],
)


@pytest.mark.parametrize(
"variable,data",
Expand Down Expand Up @@ -98,7 +112,8 @@ def test_check_aggregate_top_level(simple_df):


@pytest.mark.parametrize(
"variable", ("Primary Energy", (["Primary Energy", "Emissions|CO2"]))
"variable",
(("Primary Energy"), (["Primary Energy", "Emissions|CO2"])),
)
def test_aggregate_append(simple_df, variable):
# remove `variable`, do aggregate and append, check equality to original
Expand Down Expand Up @@ -167,6 +182,7 @@ def test_aggregate_skip_intermediate(recursive_df):
)
def test_aggregate_empty(test_df, variable, append, caplog):
"""Check for performing an "empty" aggregation"""
caplog.set_level(logging.INFO, logger="pyam.aggregation")

if append:
# with `append=True`, the instance is unchanged
Expand All @@ -177,7 +193,7 @@ def test_aggregate_empty(test_df, variable, append, caplog):
# with `append=False` (default), an empty instance is returned
assert test_df.aggregate(variable).empty

msg = f"Cannot aggregate variable '{variable}' because it has no components."
msg = f"Cannot aggregate variable '{variable}' because it has no components!"
idx = caplog.messages.index(msg)
assert caplog.records[idx].levelname == "INFO"

Expand All @@ -196,7 +212,7 @@ def test_aggregate_components_as_dict(simple_df):
@pytest.mark.parametrize(
"variable",
(
"Primary Energy",
("Primary Energy"),
(["Primary Energy", "Primary Energy|Coal", "Primary Energy|Wind"]),
),
)
Expand Down Expand Up @@ -225,12 +241,13 @@ def test_check_aggregate_region(simple_df):

def test_check_aggregate_region_log(simple_df, caplog):
# verify that `check_aggregate_region()` writes log on empty assertion
caplog.set_level(logging.INFO, logger="pyam.core")
(
simple_df.filter(
variable="Primary Energy", region="World", keep=False
).check_aggregate_region("Primary Energy")
)
msg = "Variable 'Primary Energy' does not exist in region 'World'."
msg = "Variable 'Primary Energy' does not exist in region 'World'!"
idx = caplog.messages.index(msg)
assert caplog.records[idx].levelname == "INFO"

Expand All @@ -243,7 +260,7 @@ def test_check_aggregate_region_log(simple_df, caplog):
),
)
def test_aggregate_region_append(simple_df, variable):
# remove `variable`, aggregate and append, check equality to original
# remove `variable`, do aggregate and append, check equality to original
_df = simple_df.filter(variable=variable, region="World", keep=False)
_df.aggregate_region(variable, append=True)
assert_iamframe_equal(_df, simple_df)
Expand Down Expand Up @@ -304,66 +321,67 @@ def test_aggregate_region_with_components(simple_df):
assert _df.check_aggregate_region(v, components=["foo"]) is None


def test_aggregate_region_with_weights(simple_df, caplog):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
w = "Emissions|CO2"
assert simple_df.check_aggregate_region(v) is not None
assert simple_df.check_aggregate_region(v, weight=w) is None
def test_agg_weight():
variable = "Price|Carbon"
weight = "Emissions|CO2"
# negative weights should be dropped on default
obs_1 = IamDataFrame(NEG_WEIGHTS_DF).aggregate_region(variable, weight=weight)._data
exp_1 = np.array([5.25])
np.testing.assert_array_equal(obs_1.values, exp_1)

# test the full dataset
exp = simple_df.filter(variable=v, region="World")
assert_iamframe_equal(simple_df.aggregate_region(v, weight=w), exp)

# test that dropping negative weights works as expected
neg_weights_df = simple_df.copy()
neg_weights_df._data[18] = -6
exp = simple_df.filter(variable=v, region="World", year=2010)
assert_iamframe_equal(neg_weights_df.aggregate_region(v, weight=w), exp)

msg = (
"Some weights are negative. Data weighted by negative values will be dropped. "
"To use both positive and negative weights, please use the keyword argument "
"`drop_negative_weights=False`."
# negative weights shouldn't be dropped if drop_negative_weights=False
obs_2 = (
IamDataFrame(NEG_WEIGHTS_DF)
.aggregate_region(variable, weight=weight, drop_negative_weights=False)
._data
)
idx = caplog.messages.index(msg)
assert caplog.records[idx].levelname == "WARNING"
exp_2 = np.array([9, 5.25])
np.testing.assert_array_equal(obs_2.values, exp_2)

# test that not dropping negative weights works as expected
exp = simple_df.filter(variable=v, region="World")
exp._data[0] = -8
assert_iamframe_equal(
neg_weights_df.aggregate_region(v, weight=w, drop_negative_weights=False), exp

def test_aggregate_region_with_no_weights_drop_negative_weights_raises(simple_df):
# dropping negative weights can only be used with weight
pytest.raises(
ValueError,
simple_df.aggregate_region,
"Price|Carbon",
drop_negative_weights=False,
)


def test_aggregate_region_with_weights_raises(simple_df):
def test_aggregate_region_with_weights(simple_df):
# carbon price shouldn't be summed but be weighted by emissions
v = "Price|Carbon"
w = "Emissions|CO2"
assert simple_df.check_aggregate_region(v) is not None
assert simple_df.check_aggregate_region(v, weight=w) is None

exp = simple_df.filter(variable=v, region="World")
assert_iamframe_equal(simple_df.aggregate_region(v, weight=w), exp)

# inconsistent index of variable and weight raises an error
_df = simple_df.filter(variable=w, region="reg_b", keep=False)
with pytest.raises(ValueError, match="Inconsistent index between variable and wei"):
_df.aggregate_region(v, weight=w)
pytest.raises(ValueError, _df.aggregate_region, v, weight=w)

# using weight and method other than 'sum' raises an error
pytest.raises(ValueError, simple_df.aggregate_region, v, method="max", weight="bar")

# setting both weight and components raises an error
pytest.raises(
ValueError, simple_df.aggregate_region, v, components=True, weight="bar"
)

# dropping negative weights can only be used with weight
def test_aggregate_region_with_components_and_weights_raises(simple_df):
# setting both weight and components raises an error
pytest.raises(
ValueError, simple_df.aggregate_region, v, drop_negative_weights=False
ValueError,
simple_df.aggregate_region,
"Emissions|CO2",
components=True,
weight="bar",
)


@pytest.mark.parametrize("variable, append", (("Primary Energy", "foo"), (False, True)))
def test_aggregate_region_empty(test_df, variable, append, caplog):
"""Check for performing an "empty" aggregation"""
caplog.set_level(logging.INFO, logger="pyam.aggregation")

if append:
# with `append=True`, the instance is unchanged
Expand All @@ -375,9 +393,10 @@ def test_aggregate_region_empty(test_df, variable, append, caplog):
# with `append=False` (default), an empty instance is returned
assert test_df.aggregate_region(variable).empty

caplog.set_level(logging.INFO, logger="pyam.aggregation")
msg = (
f"Cannot aggregate variable '{variable}' to 'World' "
"because it does not exist in any subregion."
"because it does not exist in any subregion!"
)
idx = caplog.messages.index(msg)
assert caplog.records[idx].levelname == "INFO"
Expand Down
Loading