ecmwf · HCookie · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@ Keep it human-readable, your future self will thank you!
 ### Added
 - Codeowners file (#56)
 - Changelog merge strategy (#56)
+- Included more loss functions and allowed configuration [#70](https://github.com/ecmwf/anemoi-training/pull/70)
 
 #### Miscellaneous
 

diff --git a/src/anemoi/training/config/training/default.yaml b/src/anemoi/training/config/training/default.yaml
@@ -33,10 +33,34 @@ swa:
 # use ZeroRedundancyOptimizer ; saves memory for larger models
 zero_optimizer: False
 
-# dynamic rescaling of the loss gradient
-# see https://arxiv.org/pdf/2306.06079.pdf, section 4.3.2
-# don't enable this by default until it's been tested and proven beneficial
-loss_gradient_scaling: False
+# loss functions
+loss_functions:
+
+  # dynamic rescaling of the loss gradient
+  # see https://arxiv.org/pdf/2306.06079.pdf, section 4.3.2
+  # don't enable this by default until it's been tested and proven beneficial
+  loss_gradient_scaling: False
+
+
+  # loss function for the model
+  loss:
+    # loss class to initialise, can be anything subclassing torch.nn.Module
+    _target_: anemoi.training.losses.mse.WeightedMSELoss
+    # what to include in the loss class initialisation
+    include_node_weights: True
+    include_feature_weights: True
+    # other kwargs
+    ignore_nans: False
+
+  # loss function for metric calculation
+  metrics:
+    # loss class to initialise, can be anything subclassing torch.nn.Module
+    _target_: anemoi.training.losses.mse.WeightedMSELoss
+    # what to include in the loss class initialisation
+    include_node_weights: True
+    include_feature_weights: False
+    # other kwargs
+    ignore_nans: True
 
 # length of the "rollout" window (see Keisler's paper)
 rollout:

diff --git a/src/anemoi/training/losses/logcosh.py b/src/anemoi/training/losses/logcosh.py
@@ -0,0 +1,109 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+from __future__ import annotations
+
+import logging
+from functools import cached_property
+
+import torch
+from torch import nn
+
+LOGGER = logging.getLogger(__name__)
+
+
+class WeightedLogCoshLoss(nn.Module):
+    """Latitude-weighted LogCosh loss."""
+
+    def __init__(
+        self,
+        node_weights: torch.Tensor,
+        feature_weights: torch.Tensor | None = None,
+        ignore_nans: bool | None = False,
+    ) -> None:
+        """Latitude- and (inverse-)variance-weighted LogCosh Loss.
+
+        Parameters
+        ----------
+        node_weights : torch.Tensor of shape (N, )
+            Weight of each node in the loss function
+        feature_weights : Optional[torch.Tensor], optional
+            precomputed, per-variable stepwise variance estimate, by default None
+        ignore_nans : bool, optional
+            Allow nans in the loss and apply methods ignoring nans for measuring the loss, by default False
+
+        """
+        super().__init__()
+
+        self.avg_function = torch.nanmean if ignore_nans else torch.mean
+        self.sum_function = torch.nansum if ignore_nans else torch.sum
+
+        self.register_buffer("weights", node_weights, persistent=True)
+        if feature_weights is not None:
+            self.register_buffer("ivar", feature_weights, persistent=True)
+
+    def forward(
+        self,
+        pred: torch.Tensor,
+        target: torch.Tensor,
+        squash: bool = True,
+        feature_indices: torch.Tensor | None = None,
+        feature_scale: bool = True,
+    ) -> torch.Tensor:
+        """Calculates the lat-weighted LogCosh loss.
+
+        Parameters
+        ----------
+        pred : torch.Tensor
+            Prediction tensor, shape (bs, lat*lon, n_outputs)
+        target : torch.Tensor
+            Target tensor, shape (bs, lat*lon, n_outputs)
+        squash : bool, optional
+            Average last dimension, by default True
+        feature_indices:
+            feature indices (relative to full model output) of the features passed in pred and target
+        feature_scale:
+            If True, scale the loss by the feature_weights
+
+        Returns
+        -------
+        torch.Tensor
+            Weighted LogCosh loss
+
+        """
+        if pred.ndim == 4:
+            pred = pred.mean(dim=1)
+
+        out = torch.log(torch.cosh(pred - target))
+
+        # Use variances if available
+        if feature_scale and hasattr(self, "feature_weights"):
+            out = (
+                out * self.feature_weights
+                if feature_indices is None
+                else out * self.feature_weights[..., feature_indices]
+            )
+
+        # Squash by last dimension
+        if squash:
+            out = self.avg_function(out, dim=-1)
+            # Weight by area
+            out *= self.weights.expand_as(out)
+            out /= self.sum_function(self.weights.expand_as(out))
+            return self.sum_function(out)
+
+        # Weight by area, due to weighting construction is analagous to a mean
+        out *= self.weights[..., None].expand_as(out)
+        # keep last dimension (variables) when summing weights
+        out /= self.sum_function(self.weights[..., None].expand_as(out), axis=(0, 1, 2))
+        return self.sum_function(out, axis=(0, 1, 2))
+
+    @cached_property
+    def name(self) -> str:
+        return "logcosh"
diff --git a/src/anemoi/training/losses/mae.py b/src/anemoi/training/losses/mae.py
@@ -0,0 +1,108 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+from __future__ import annotations
+
+import logging
+from functools import cached_property
+
+import torch
+from torch import nn
+
+LOGGER = logging.getLogger(__name__)
+
+
+class WeightedMAELoss(nn.Module):
+    """Latitude-weighted MAE loss."""
+
+    def __init__(
+        self,
+        node_weights: torch.Tensor,
+        feature_weights: torch.Tensor | None = None,
+        ignore_nans: bool = False,
+    ) -> None:
+        """Latitude- and (inverse-)variance-weighted MAE Loss.
+
+        Also known as the Weighted L1 loss.
+
+        Parameters
+        ----------
+        node_weights : torch.Tensor
+            Weights by area
+        feature_weights : Optional[torch.Tensor], optional
+            precomputed, per-variable stepwise variance estimate, by default None
+        ignore_nans : bool, optional
+            Allow nans in the loss and apply methods ignoring nans for measuring the loss, by default False
+        """
+        super().__init__()
+
+        self.avg_function = torch.nanmean if ignore_nans else torch.mean
+        self.sum_function = torch.nansum if ignore_nans else torch.sum
+
+        self.register_buffer("node_weights", node_weights, persistent=True)
+        if feature_weights is not None:
+            self.register_buffer("feature_weights", feature_weights, persistent=True)
+
+    def forward(
+        self,
+        pred: torch.Tensor,
+        target: torch.Tensor,
+        squash: bool = True,
+        feature_indices: torch.Tensor | None = None,
+        feature_scale: bool = True,
+    ) -> torch.Tensor:
+        """Calculates the lat-weighted MAE loss.
+
+        Parameters
+        ----------
+        pred : torch.Tensor
+            Prediction tensor, shape (bs, (optional_ensemble), lat*lon, n_outputs)
+        target : torch.Tensor
+            Target tensor, shape (bs, (optional_ensemble), lat*lon, n_outputs)
+        squash : bool, optional
+            Average last dimension, by default True
+        feature_indices:
+            feature indices (relative to full model output) of the features passed in pred and target
+        feature_scale:
+            If True, scale the loss by the feature_weights
+
+        Returns
+        -------
+        torch.Tensor
+            Weighted MAE loss
+        """
+        if pred.ndim == 4:
+            pred = pred.mean(dim=1)
+
+        out = torch.abs(pred - target)
+
+        if feature_scale and hasattr(self, "feature_weights"):
+            out = (
+                out * self.feature_weights
+                if feature_indices is None
+                else out * self.feature_weights[..., feature_indices]
+            )
+
+        # Squash by last dimension
+        if squash:
+            out = self.avg_function(out, dim=-1)
+            # Weight by area
+            out *= self.node_weights.expand_as(out)
+            out /= self.sum_function(self.node_weights.expand_as(out))
+            return self.sum_function(out)
+
+        # Weight by area, due to weighting construction is analagous to a mean
+        out *= self.node_weights[..., None].expand_as(out)
+        # keep last dimension (variables) when summing weights
+        out /= self.sum_function(self.node_weights[..., None].expand_as(out))
+        return self.sum_function(out, axis=(0, 1, 2))
+
+    @cached_property
+    def name(self) -> str:
+        return "mae"
diff --git a/src/anemoi/training/losses/mse.py b/src/anemoi/training/losses/mse.py
@@ -10,6 +10,7 @@
 from __future__ import annotations
 
 import logging
+from functools import cached_property
 
 import torch
 from torch import nn
@@ -23,69 +24,87 @@ class WeightedMSELoss(nn.Module):
     def __init__(
         self,
         node_weights: torch.Tensor,
-        data_variances: torch.Tensor | None = None,
-        ignore_nans: bool | None = False,
+        feature_weights: torch.Tensor | None = None,
+        ignore_nans: bool = False,
     ) -> None:
         """Latitude- and (inverse-)variance-weighted MSE Loss.
 
         Parameters
         ----------
-        node_weights : torch.Tensor of shape (N, )
-            Weight of each node in the loss function
-        data_variances : Optional[torch.Tensor], optional
+        node_weights : torch.Tensor
+            Weights by area
+        feature_weights : Optional[torch.Tensor], optional
             precomputed, per-variable stepwise variance estimate, by default None
         ignore_nans : bool, optional
             Allow nans in the loss and apply methods ignoring nans for measuring the loss, by default False
-
         """
         super().__init__()
 
         self.avg_function = torch.nanmean if ignore_nans else torch.mean
         self.sum_function = torch.nansum if ignore_nans else torch.sum
 
-        self.register_buffer("weights", node_weights, persistent=True)
-        if data_variances is not None:
-            self.register_buffer("ivar", data_variances, persistent=True)
+        self.register_buffer("node_weights", node_weights, persistent=True)
+        if feature_weights is not None:
+            self.register_buffer("feature_weights", feature_weights, persistent=True)
 
     def forward(
         self,
         pred: torch.Tensor,
         target: torch.Tensor,
         squash: bool = True,
+        feature_indices: torch.Tensor | None = None,
+        feature_scale: bool = True,
     ) -> torch.Tensor:
         """Calculates the lat-weighted MSE loss.
 
         Parameters
         ----------
         pred : torch.Tensor
-            Prediction tensor, shape (bs, lat*lon, n_outputs)
+            Prediction tensor, shape (bs, (optional_ensemble), lat*lon, n_outputs)
         target : torch.Tensor
-            Target tensor, shape (bs, lat*lon, n_outputs)
+            Target tensor, shape (bs, (optional_ensemble), lat*lon, n_outputs)
         squash : bool, optional
             Average last dimension, by default True
+        feature_indices:
+            feature indices (relative to full model output) of the features passed in pred and target
+        feature_scale:
+            If True, scale the loss by the feature_weights
 
         Returns
         -------
         torch.Tensor
             Weighted MSE loss
-
         """
+        if pred.ndim == 4:
+            pred = pred.mean(dim=1)
+
+        torch.save(self.node_weights, "node_weights.pt")
+        torch.save(pred, "pred.pt")
+        torch.save(target, "target.pt")
+
         out = torch.square(pred - target)
 
-        # Use variances if available
-        if hasattr(self, "ivar"):
-            out *= self.ivar
+        if feature_scale and hasattr(self, "feature_weights"):
+            out = (
+                out * self.feature_weights
+                if feature_indices is None
+                else out * self.feature_weights[..., feature_indices]
+            )
 
         # Squash by last dimension
         if squash:
             out = self.avg_function(out, dim=-1)
             # Weight by area
-            out *= self.weights.expand_as(out)
-            out /= self.sum_function(self.weights.expand_as(out))
+            out *= self.node_weights.expand_as(out)
+            out /= self.sum_function(self.node_weights.expand_as(out))
             return self.sum_function(out)
 
-        # Weight by area
-        out *= self.weights[..., None].expand_as(out)
+        # Weight by area, due to weighting construction is analagous to a mean
+        out *= self.node_weights[..., None].expand_as(out)
         # keep last dimension (variables) when summing weights
-        out /= self.sum_function(self.weights[..., None].expand_as(out), axis=(0, 1, 2))
+        out /= self.sum_function(self.node_weights[..., None].expand_as(out))
         return self.sum_function(out, axis=(0, 1, 2))
+
+    @cached_property
+    def name(self) -> str:
+        return "mse"