Merge remote-tracking branch 'origin/develop' into feature/28-make-mo…

…dels-switchable-through-the-config
ecmwf · Sep 23, 2024 · 6afb1cd · 6afb1cd
2 parents 32577ff + 90ef59c
commit 6afb1cd
Show file tree

Hide file tree

Showing 22 changed files with 425 additions and 53 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+CHANGELOG.md merge=union
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -0,0 +1,6 @@
+# CODEOWNERS file
+
+# Protect workflow files
+/.github/ @theissenhelen @jesperdramsch @gmertes
+/.pre-commit-config.yaml @theissenhelen @jesperdramsch @gmertes
+/pyproject.toml @theissenhelen @jesperdramsch @gmertes
diff --git a/.github/ci-hpc-config.yml b/.github/ci-hpc-config.yml
@@ -0,0 +1,7 @@
+build:
+  python: '3.10'
+  modules:
+    - ninja
+  python_dependencies:
+    - ecmwf/anemoi-utils@develop
+  parallel: 64
diff --git a/.github/workflows/changelog-pr-update.yml b/.github/workflows/changelog-pr-update.yml
@@ -2,6 +2,9 @@ name: Check Changelog Update on PR
 on:
   pull_request:
     types: [assigned, opened, synchronize, reopened, labeled, unlabeled]
+    paths-ignore:
+      - .pre-commit-config.yaml
+      - .readthedocs.yaml
 jobs:
   Check-Changelog:
     name: Check Changelog Action

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -37,7 +37,7 @@ jobs:
   downstream-ci-hpc:
     name: downstream-ci-hpc
     if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
-    uses: ecmwf-actions/downstream-ci/.github/workflows/downstream-ci.yml@main
+    uses: ecmwf-actions/downstream-ci/.github/workflows/downstream-ci-hpc.yml@main
     with:
       anemoi-models: ecmwf/anemoi-models@${{ github.event.pull_request.head.sha || github.sha }}
     secrets: inherit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -20,6 +20,12 @@ repos:
   - id: no-commit-to-branch # Prevent committing to main / master
   - id: check-added-large-files # Check for large files added to git
   - id: check-merge-conflict # Check for files that contain merge conflict
+-   repo: https://github.com/pre-commit/pygrep-hooks
+    rev: v1.10.0  # Use the ref you want to point at
+    hooks:
+    -   id: python-use-type-annotations # Check for missing type annotations
+    -   id: python-check-blanket-noqa # Check for # noqa: all
+    -   id: python-no-log-warn # Check for log.warn
 - repo: https://github.com/psf/black-pre-commit-mirror
   rev: 24.8.0
   hooks:
@@ -34,7 +40,7 @@ repos:
     - --force-single-line-imports
     - --profile black
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.6.3
+  rev: v0.6.4
   hooks:
   - id: ruff
     # Next line if for documenation cod snippets
@@ -45,7 +51,7 @@ repos:
     - --exit-non-zero-on-fix
     - --preview
 - repo: https://github.com/sphinx-contrib/sphinx-lint
-  rev: v0.9.1
+  rev: v1.0.0
   hooks:
   - id: sphinx-lint
 # For now, we use it. But it does not support a lot of sphinx features
@@ -59,12 +65,21 @@ repos:
   hooks:
   - id: docconvert
     args: ["numpy"]
-- repo: https://github.com/b8raoult/optional-dependencies-all
-  rev: "0.0.6"
-  hooks:
-  - id: optional-dependencies-all
-    args: ["--inplace", "--exclude-keys=dev,docs,tests", "--group=dev=all,docs,tests"]
 - repo: https://github.com/tox-dev/pyproject-fmt
-  rev: "2.2.1"
+  rev: "2.2.3"
   hooks:
   - id: pyproject-fmt
+-   repo: https://github.com/jshwi/docsig # Check docstrings against function sig
+    rev: v0.60.1
+    hooks:
+    -   id: docsig
+        args:
+        - --ignore-no-params # Allow docstrings without parameters
+        - --check-dunders    # Check dunder methods
+        - --check-overridden # Check overridden methods
+        - --check-protected  # Check protected methods
+        - --check-class      # Check class docstrings
+        - --disable=E113     # Disable empty docstrings
+        - --summary          # Print a summary
+ci:
+  autoupdate_schedule: monthly
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,19 @@ Keep it human-readable, your future self will thank you!
 
 ## [Unreleased](https://github.com/ecmwf/anemoi-models/compare/0.3.0...HEAD)
 
+### Added
+- Codeowners file
+- Pygrep precommit hooks
+- Docsig precommit hooks
+- Changelog merge strategy
+- configurabilty of the dropout probability in the the MultiHeadSelfAttention module
+- Variable Bounding as configurable model layers [#13](https://github.com/ecmwf/anemoi-models/issues/13)
+
+### Changed
+- Bugfixes for CI
+
+### Removed
+
 ## [0.3.0](https://github.com/ecmwf/anemoi-models/compare/0.2.1...0.3.0) - Remapping of (meteorological) Variables
 
 ### Added

diff --git a/pyproject.toml b/pyproject.toml
@@ -11,20 +11,13 @@
 [build-system]
 build-backend = "setuptools.build_meta"
 
-requires = [
-  "setuptools>=61",
-  "setuptools-scm>=8",
-]
+requires = [ "setuptools>=61", "setuptools-scm>=8" ]
 
 [project]
 name = "anemoi-models"
 description = "A package to hold various functions to support training of ML models."
 readme = "README.md"
-keywords = [
-  "ai",
-  "models",
-  "tools",
-]
+keywords = [ "ai", "models", "tools" ]
 
 license = { file = "LICENSE" }
 authors = [
@@ -47,29 +40,17 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
 
-dynamic = [
-  "version",
-]
+dynamic = [ "version" ]
 dependencies = [
   "anemoi-utils>=0.1.9",
   "einops>=0.6.1",
   "hydra-core>=1.3",
   "torch>=2.2",
   "torch-geometric>=2.3,<2.5",
 ]
-optional-dependencies.all = [
-]
+optional-dependencies.all = [  ]
 
-optional-dependencies.dev = [
-  "hypothesis",
-  "nbsphinx",
-  "pandoc",
-  "pytest",
-  "rstfmt",
-  "sphinx",
-  "sphinx-argparse<0.5",
-  "sphinx-rtd-theme",
-]
+optional-dependencies.dev = [ "anemoi-models[all,docs,tests]" ]
 
 optional-dependencies.docs = [
   "nbsphinx",
@@ -80,10 +61,7 @@ optional-dependencies.docs = [
   "sphinx-rtd-theme",
 ]
 
-optional-dependencies.tests = [
-  "hypothesis",
-  "pytest",
-]
+optional-dependencies.tests = [ "hypothesis", "pytest" ]
 
 urls.Documentation = "https://anemoi-models.readthedocs.io/"
 urls.Homepage = "https://github.com/ecmwf/anemoi-models/"

diff --git a/src/anemoi/models/layers/attention.py b/src/anemoi/models/layers/attention.py
@@ -40,19 +40,19 @@ def __init__(
         bias: bool = False,
         is_causal: bool = False,
         window_size: Optional[int] = None,
-        dropout: float = 0.0,
+        dropout_p: float = 0.0,
     ):
         super().__init__()
 
         assert (
             embed_dim % num_heads == 0
         ), f"Embedding dimension ({embed_dim}) must be divisible by number of heads ({num_heads})"
 
-        self.dropout = dropout
         self.num_heads = num_heads
         self.embed_dim = embed_dim
         self.head_dim = embed_dim // num_heads  # q k v
         self.window_size = (window_size, window_size)  # flash attention
+        self.dropout_p = dropout_p
         self.is_causal = is_causal
 
         self.lin_qkv = nn.Linear(embed_dim, 3 * embed_dim, bias=bias)
@@ -86,15 +86,22 @@ def forward(
         query = shard_heads(query, shapes=shapes, mgroup=model_comm_group)
         key = shard_heads(key, shapes=shapes, mgroup=model_comm_group)
         value = shard_heads(value, shapes=shapes, mgroup=model_comm_group)
+        dropout_p = self.dropout_p if self.training else 0.0
 
         if _FLASH_ATTENTION_AVAILABLE:
             query, key, value = (
                 einops.rearrange(t, "batch heads grid vars -> batch grid heads vars") for t in (query, key, value)
             )
-            out = self.attention(query, key, value, causal=False, window_size=self.window_size)
+            out = self.attention(query, key, value, causal=False, window_size=self.window_size, dropout_p=dropout_p)
             out = einops.rearrange(out, "batch grid heads vars -> batch heads grid vars")
         else:
-            out = self.attention(query, key, value, is_causal=False)  # expects (batch heads grid variable) format
+            out = self.attention(
+                query,
+                key,
+                value,
+                is_causal=False,
+                dropout_p=dropout_p,
+            )  # expects (batch heads grid variable) format
 
         out = shard_sequence(out, shapes=shapes, mgroup=model_comm_group)
         out = einops.rearrange(out, "batch heads grid vars -> (batch grid) (heads vars)")

diff --git a/src/anemoi/models/layers/block.py b/src/anemoi/models/layers/block.py
@@ -55,7 +55,15 @@ def forward(
 class TransformerProcessorBlock(BaseBlock):
     """Transformer block with MultiHeadSelfAttention and MLPs."""
 
-    def __init__(self, num_channels, hidden_dim, num_heads, activation, window_size: int):
+    def __init__(
+        self,
+        num_channels: int,
+        hidden_dim: int,
+        num_heads: int,
+        activation: str,
+        window_size: int,
+        dropout_p: float = 0.0,
+    ):
         super().__init__()
 
         try:
@@ -72,7 +80,7 @@ def __init__(self, num_channels, hidden_dim, num_heads, activation, window_size:
             window_size=window_size,
             bias=False,
             is_causal=False,
-            dropout=0.0,
+            dropout_p=dropout_p,
         )
 
         self.mlp = nn.Sequential(

diff --git a/src/anemoi/models/layers/bounding.py b/src/anemoi/models/layers/bounding.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+from abc import ABC
+from abc import abstractmethod
+
+import torch
+from torch import nn
+
+from anemoi.models.data_indices.tensor import InputTensorIndex
+
+
+class BaseBounding(nn.Module, ABC):
+    """Abstract base class for bounding strategies.
+
+    This class defines an interface for bounding strategies which are used to apply a specific
+    restriction to the predictions of a model.
+    """
+
+    def __init__(
+        self,
+        *,
+        variables: list[str],
+        name_to_index: dict,
+    ) -> None:
+        super().__init__()
+
+        self.name_to_index = name_to_index
+        self.variables = variables
+        self.data_index = self._create_index(variables=self.variables)
+
+    def _create_index(self, variables: list[str]) -> InputTensorIndex:
+        return InputTensorIndex(includes=variables, excludes=[], name_to_index=self.name_to_index)._only
+
+    @abstractmethod
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Applies the bounding to the predictions.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            The tensor containing the predictions that will be bounded.
+
+        Returns
+        -------
+        torch.Tensor
+        A tensor with the bounding applied.
+        """
+        pass
+
+
+class ReluBounding(BaseBounding):
+    """Initializes the bounding with a ReLU activation / zero clamping."""
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x[..., self.data_index] = torch.nn.functional.relu(x[..., self.data_index])
+        return x
+
+
+class HardtanhBounding(BaseBounding):
+    """Initializes the bounding with specified minimum and maximum values for bounding.
+
+    Parameters
+    ----------
+    variables : list[str]
+        A list of strings representing the variables that will be bounded.
+    name_to_index : dict
+        A dictionary mapping the variable names to their corresponding indices.
+    min_val : float
+        The minimum value for the HardTanh activation.
+    max_val : float
+        The maximum value for the HardTanh activation.
+    """
+
+    def __init__(self, *, variables: list[str], name_to_index: dict, min_val: float, max_val: float) -> None:
+        super().__init__(variables=variables, name_to_index=name_to_index)
+        self.min_val = min_val
+        self.max_val = max_val
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x[..., self.data_index] = torch.nn.functional.hardtanh(
+            x[..., self.data_index], min_val=self.min_val, max_val=self.max_val
+        )
+        return x
+
+
+class FractionBounding(HardtanhBounding):
+    """Initializes the FractionBounding with specified parameters.
+
+    Parameters
+    ----------
+    variables : list[str]
+        A list of strings representing the variables that will be bounded.
+    name_to_index : dict
+        A dictionary mapping the variable names to their corresponding indices.
+    min_val : float
+        The minimum value for the HardTanh activation.
+    max_val : float
+        The maximum value for the HardTanh activation.
+    total_var : str
+        A string representing a variable from which a secondary variable is derived. For
+        example, in the case of convective precipitation (Cp), total_var = Tp (total precipitation).
+    """
+
+    def __init__(
+        self, *, variables: list[str], name_to_index: dict, min_val: float, max_val: float, total_var: str
+    ) -> None:
+        super().__init__(variables=variables, name_to_index=name_to_index, min_val=min_val, max_val=max_val)
+        self.total_variable = self._create_index(variables=[total_var])
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Apply the HardTanh bounding  to the data_index variables
+        x = super().forward(x)
+        # Calculate the fraction of the total variable
+        x[..., self.data_index] *= x[..., self.total_variable]
+        return x
diff --git a/src/anemoi/models/layers/chunk.py b/src/anemoi/models/layers/chunk.py
@@ -73,6 +73,7 @@ def __init__(
         num_heads: int = 16,
         mlp_hidden_ratio: int = 4,
         activation: str = "GELU",
+        dropout_p: float = 0.0,
     ) -> None:
         """Initialize TransformerProcessor.
 
@@ -88,6 +89,8 @@ def __init__(
             ratio of mlp hidden dimension to embedding dimension, default 4
         activation : str, optional
             Activation function, by default "GELU"
+        dropout_p: float
+            Dropout probability used for multi-head self attention, default 0.0
         """
         super().__init__(num_channels=num_channels, num_layers=num_layers)
 
@@ -98,6 +101,7 @@ def __init__(
             num_heads=num_heads,
             activation=activation,
             window_size=window_size,
+            dropout_p=dropout_p,
         )
 
     def forward(