IAMconsortium · danielhuppmann · Oct 13, 2023
diff --git a/pyam/aggregation.py b/pyam/aggregation.py
@@ -30,7 +30,7 @@ def _aggregate(df, variable, components=None, method="sum"):
             )
 
     mapping = {}
-    msg = "Cannot aggregate variable '{}' because it has no components."
+    msg = "Cannot aggregate variable '{}' because it has no components!"
     # if single variable
     if is_str(variable):
         # default components to all variables one level below `variable`
@@ -109,19 +109,19 @@ def _aggregate_region(
     """Internal implementation for aggregating data over subregions"""
     if not is_str(variable) and components is not False:
         raise ValueError(
-            "Aggregating by list of variables with components is not supported."
+            "Aggregating by list of variables with components is not supported!"
         )
 
     if weight is not None and components is not False:
-        raise ValueError("Using weights and components in one operation not supported.")
+        raise ValueError("Using weights and components in one operation not supported!")
 
     # default subregions to all regions other than `region`
     subregions = subregions or df._all_other_regions(region, variable)
 
     if not len(subregions):
         logger.info(
             f"Cannot aggregate variable '{variable}' to '{region}' "
-            "because it does not exist in any subregion."
+            "because it does not exist in any subregion!"
         )
         return
 
@@ -131,7 +131,7 @@ def _aggregate_region(
     if weight is None:
         if drop_negative_weights is False:
             raise ValueError(
-                "Dropping negative weights can only be used with `weights`."
+                "Dropping negative weights can only be used with `weights`!"
             )
 
         _data = _group_and_agg(subregion_df._data[rows], "region", method=method)
@@ -208,7 +208,7 @@ def _agg_weight(data, weight, method, drop_negative_weights):
 
     # only summation allowed with weights
     if method not in ["sum", np.sum]:
-        raise ValueError("Only method 'np.sum' allowed for weighted average.")
+        raise ValueError("Only method 'np.sum' allowed for weighted average!")
 
     weight = weight.droplevel(["variable", "unit"])
 
@@ -218,11 +218,12 @@ def _agg_weight(data, weight, method, drop_negative_weights):
     if drop_negative_weights is True:
         if any(weight < 0):
             logger.warning(
-                "Some weights are negative. Data weighted by negative values will be "
-                "dropped. To use both positive and negative weights, "
+                "Some of the weights are negative. "
+                "All data weighted by negative values will be dropped. "
+                "To apply both positive and negative weights to the data, "
                 "please use the keyword argument `drop_negative_weights=False`."
             )
-            # drop negative weights
+            # Drop negative weights
             weight[weight < 0] = None
 
     col1 = data.index.names.difference(["region"])
@@ -241,4 +242,4 @@ def _get_method_func(method):
         return KNOWN_FUNCS[method]
 
     # raise error if `method` is a string but not in dict of known methods
-    raise ValueError(f"Unknown method: {method}")
+    raise ValueError(f"'{method}' is not a known method!")
diff --git a/pyam/core.py b/pyam/core.py
@@ -1464,7 +1464,7 @@ def aggregate_region(
         """Aggregate timeseries data by subregions.
 
         This function allows to add variable sub-categories that are only
-        defined at the `region` level by setting `components=True`.
+        defined at the `region` level by setting `components=True`
 
         Parameters
         ----------
@@ -1589,7 +1589,7 @@ def check_aggregate_region(
         # filter and groupby data, use `pd.Series.align` for matching index
         rows = self._apply_filters(region=region, variable=variable)
         if not rows.any():
-            logger.info(f"Variable '{variable}' does not exist in region '{region}'.")
+            logger.info(f"Variable '{variable}' does not exist in region '{region}'!")
             return
 
         df_region, df_subregions = _group_and_agg(self._data[rows], "region").align(
@@ -1630,7 +1630,7 @@ def aggregate_time(
         method="sum",
         append=False,
     ):
-        """Aggregate timeseries data by subannual time resolution.
+        """Aggregate timeseries data by subannual time resolution
 
         Parameters
         ----------
@@ -1671,7 +1671,7 @@ def downscale_region(
         weight=None,
         append=False,
     ):
-        """Downscale timeseries data to a number of subregions.
+        """Downscale timeseries data to a number of subregions
 
         Parameters
         ----------
@@ -1742,7 +1742,7 @@ def _get_cols(self, cols):
         return META_IDX + cols + self.extra_cols
 
     def check_internal_consistency(self, components=False, **kwargs):
-        """Check whether a scenario ensemble is internally consistent.
+        """Check whether a scenario ensemble is internally consistent
 
         We check that all variables are equal to the sum of their sectoral
         components and that all the regions add up to the World total. If

diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py
@@ -43,6 +43,20 @@
     columns=LONG_IDX + ["value"],
 )
 
+NEG_WEIGHTS_DF = pd.DataFrame(
+    [
+        ["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2005, -4.0],
+        ["model_a", "scen_a", "reg_a", "Emissions|CO2", "EJ/yr", 2010, 5.0],
+        ["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2005, 2.0],
+        ["model_a", "scen_a", "reg_b", "Emissions|CO2", "EJ/yr", 2010, 3.0],
+        ["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2005, 6.0],
+        ["model_a", "scen_a", "reg_a", "Price|Carbon", "USD/tCO2", 2010, 6.0],
+        ["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2005, 3.0],
+        ["model_a", "scen_a", "reg_b", "Price|Carbon", "USD/tCO2", 2010, 4.0],
+    ],
+    columns=LONG_IDX + ["value"],
+)
+
 
 @pytest.mark.parametrize(
     "variable,data",
@@ -98,7 +112,8 @@ def test_check_aggregate_top_level(simple_df):
 
 
 @pytest.mark.parametrize(
-    "variable", ("Primary Energy", (["Primary Energy", "Emissions|CO2"]))
+    "variable",
+    (("Primary Energy"), (["Primary Energy", "Emissions|CO2"])),
 )
 def test_aggregate_append(simple_df, variable):
     # remove `variable`, do aggregate and append, check equality to original
@@ -167,6 +182,7 @@ def test_aggregate_skip_intermediate(recursive_df):
 )
 def test_aggregate_empty(test_df, variable, append, caplog):
     """Check for performing an "empty" aggregation"""
+    caplog.set_level(logging.INFO, logger="pyam.aggregation")
 
     if append:
         # with `append=True`, the instance is unchanged
@@ -177,7 +193,7 @@ def test_aggregate_empty(test_df, variable, append, caplog):
         # with `append=False` (default), an empty instance is returned
         assert test_df.aggregate(variable).empty
 
-    msg = f"Cannot aggregate variable '{variable}' because it has no components."
+    msg = f"Cannot aggregate variable '{variable}' because it has no components!"
     idx = caplog.messages.index(msg)
     assert caplog.records[idx].levelname == "INFO"
 
@@ -196,7 +212,7 @@ def test_aggregate_components_as_dict(simple_df):
 @pytest.mark.parametrize(
     "variable",
     (
-        "Primary Energy",
+        ("Primary Energy"),
         (["Primary Energy", "Primary Energy|Coal", "Primary Energy|Wind"]),
     ),
 )
@@ -225,12 +241,13 @@ def test_check_aggregate_region(simple_df):
 
 def test_check_aggregate_region_log(simple_df, caplog):
     # verify that `check_aggregate_region()` writes log on empty assertion
+    caplog.set_level(logging.INFO, logger="pyam.core")
     (
         simple_df.filter(
             variable="Primary Energy", region="World", keep=False
         ).check_aggregate_region("Primary Energy")
     )
-    msg = "Variable 'Primary Energy' does not exist in region 'World'."
+    msg = "Variable 'Primary Energy' does not exist in region 'World'!"
     idx = caplog.messages.index(msg)
     assert caplog.records[idx].levelname == "INFO"
 
@@ -243,7 +260,7 @@ def test_check_aggregate_region_log(simple_df, caplog):
     ),
 )
 def test_aggregate_region_append(simple_df, variable):
-    # remove `variable`, aggregate and append, check equality to original
+    # remove `variable`, do aggregate and append, check equality to original
     _df = simple_df.filter(variable=variable, region="World", keep=False)
     _df.aggregate_region(variable, append=True)
     assert_iamframe_equal(_df, simple_df)
@@ -304,66 +321,67 @@ def test_aggregate_region_with_components(simple_df):
     assert _df.check_aggregate_region(v, components=["foo"]) is None
 
 
-def test_aggregate_region_with_weights(simple_df, caplog):
-    # carbon price shouldn't be summed but be weighted by emissions
-    v = "Price|Carbon"
-    w = "Emissions|CO2"
-    assert simple_df.check_aggregate_region(v) is not None
-    assert simple_df.check_aggregate_region(v, weight=w) is None
+def test_agg_weight():
+    variable = "Price|Carbon"
+    weight = "Emissions|CO2"
+    # negative weights should be dropped on default
+    obs_1 = IamDataFrame(NEG_WEIGHTS_DF).aggregate_region(variable, weight=weight)._data
+    exp_1 = np.array([5.25])
+    np.testing.assert_array_equal(obs_1.values, exp_1)
 
-    # test the full dataset
-    exp = simple_df.filter(variable=v, region="World")
-    assert_iamframe_equal(simple_df.aggregate_region(v, weight=w), exp)
-
-    # test that dropping negative weights works as expected
-    neg_weights_df = simple_df.copy()
-    neg_weights_df._data[18] = -6
-    exp = simple_df.filter(variable=v, region="World", year=2010)
-    assert_iamframe_equal(neg_weights_df.aggregate_region(v, weight=w), exp)
-
-    msg = (
-        "Some weights are negative. Data weighted by negative values will be dropped. "
-        "To use both positive and negative weights, please use the keyword argument "
-        "`drop_negative_weights=False`."
+    # negative weights shouldn't be dropped if drop_negative_weights=False
+    obs_2 = (
+        IamDataFrame(NEG_WEIGHTS_DF)
+        .aggregate_region(variable, weight=weight, drop_negative_weights=False)
+        ._data
     )
-    idx = caplog.messages.index(msg)
-    assert caplog.records[idx].levelname == "WARNING"
+    exp_2 = np.array([9, 5.25])
+    np.testing.assert_array_equal(obs_2.values, exp_2)
 
-    # test that not dropping negative weights works as expected
-    exp = simple_df.filter(variable=v, region="World")
-    exp._data[0] = -8
-    assert_iamframe_equal(
-        neg_weights_df.aggregate_region(v, weight=w, drop_negative_weights=False), exp
+
+def test_aggregate_region_with_no_weights_drop_negative_weights_raises(simple_df):
+    # dropping negative weights can only be used with weight
+    pytest.raises(
+        ValueError,
+        simple_df.aggregate_region,
+        "Price|Carbon",
+        drop_negative_weights=False,
     )
 
 
-def test_aggregate_region_with_weights_raises(simple_df):
+def test_aggregate_region_with_weights(simple_df):
     # carbon price shouldn't be summed but be weighted by emissions
     v = "Price|Carbon"
     w = "Emissions|CO2"
+    assert simple_df.check_aggregate_region(v) is not None
+    assert simple_df.check_aggregate_region(v, weight=w) is None
+
+    exp = simple_df.filter(variable=v, region="World")
+    assert_iamframe_equal(simple_df.aggregate_region(v, weight=w), exp)
 
     # inconsistent index of variable and weight raises an error
     _df = simple_df.filter(variable=w, region="reg_b", keep=False)
-    with pytest.raises(ValueError, match="Inconsistent index between variable and wei"):
-        _df.aggregate_region(v, weight=w)
+    pytest.raises(ValueError, _df.aggregate_region, v, weight=w)
 
     # using weight and method other than 'sum' raises an error
     pytest.raises(ValueError, simple_df.aggregate_region, v, method="max", weight="bar")
 
-    # setting both weight and components raises an error
-    pytest.raises(
-        ValueError, simple_df.aggregate_region, v, components=True, weight="bar"
-    )
 
-    # dropping negative weights can only be used with weight
+def test_aggregate_region_with_components_and_weights_raises(simple_df):
+    # setting both weight and components raises an error
     pytest.raises(
-        ValueError, simple_df.aggregate_region, v, drop_negative_weights=False
+        ValueError,
+        simple_df.aggregate_region,
+        "Emissions|CO2",
+        components=True,
+        weight="bar",
     )
 
 
 @pytest.mark.parametrize("variable, append", (("Primary Energy", "foo"), (False, True)))
 def test_aggregate_region_empty(test_df, variable, append, caplog):
     """Check for performing an "empty" aggregation"""
+    caplog.set_level(logging.INFO, logger="pyam.aggregation")
 
     if append:
         # with `append=True`, the instance is unchanged
@@ -375,9 +393,10 @@ def test_aggregate_region_empty(test_df, variable, append, caplog):
         # with `append=False` (default), an empty instance is returned
         assert test_df.aggregate_region(variable).empty
 
+    caplog.set_level(logging.INFO, logger="pyam.aggregation")
     msg = (
         f"Cannot aggregate variable '{variable}' to 'World' "
-        "because it does not exist in any subregion."
+        "because it does not exist in any subregion!"
     )
     idx = caplog.messages.index(msg)
     assert caplog.records[idx].levelname == "INFO"