diff --git a/benchmarks/.gitattributes b/benchmarks/.gitattributes
new file mode 100644
index 00000000..24a8e879
--- /dev/null
+++ b/benchmarks/.gitattributes
@@ -0,0 +1 @@
+*.png filter=lfs diff=lfs merge=lfs -text
diff --git a/benchmarks/run_flame.py b/benchmarks/flame/run_flame.py
similarity index 100%
rename from benchmarks/run_flame.py
rename to benchmarks/flame/run_flame.py
diff --git a/docs/benchmarks/SineRegular.svg b/benchmarks/html/SineRegular.svg
similarity index 100%
rename from docs/benchmarks/SineRegular.svg
rename to benchmarks/html/SineRegular.svg
diff --git a/docs/benchmarks/SineUniform.svg b/benchmarks/html/SineUniform.svg
similarity index 100%
rename from docs/benchmarks/SineUniform.svg
rename to benchmarks/html/SineUniform.svg
diff --git a/benchmarks/html/img/ns_test_144.png b/benchmarks/html/img/ns_test_144.png
new file mode 100644
index 00000000..510e3e5c
--- /dev/null
+++ b/benchmarks/html/img/ns_test_144.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6848d28e0df3639ff77ce3b194d899082121e925354a0290f9b98a9ab1421af
+size 638981
diff --git a/benchmarks/html/img/ns_test_179.png b/benchmarks/html/img/ns_test_179.png
new file mode 100644
index 00000000..65f9bec3
--- /dev/null
+++ b/benchmarks/html/img/ns_test_179.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7f4368adecc8d13c9350b2cd8c0528f13da1f62c4035ad145171363e5e8e7c0
+size 602590
diff --git a/benchmarks/html/img/ns_train_237.png b/benchmarks/html/img/ns_train_237.png
new file mode 100644
index 00000000..9b283d75
--- /dev/null
+++ b/benchmarks/html/img/ns_train_237.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:257a66d3c097f067981e9028243a8547e7c3406c3dc1aaebaa462937d076a72a
+size 652635
diff --git a/benchmarks/html/img/ns_train_420.png b/benchmarks/html/img/ns_train_420.png
new file mode 100644
index 00000000..c441d0db
--- /dev/null
+++ b/benchmarks/html/img/ns_train_420.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f1d95d5a808da6221907e65a84242b19d024e7b620fb26fd25c4a5998635c24
+size 618938
diff --git a/benchmarks/html/navierstokes.png b/benchmarks/html/navierstokes.png
new file mode 100644
index 00000000..0d3b1b82
--- /dev/null
+++ b/benchmarks/html/navierstokes.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:419805b708da762b4a0c6271ae58794e3f999f49b31cbc785ccb5ac419b34527
+size 4442719
diff --git a/benchmarks/html/style.css b/benchmarks/html/style.css
index 4eb4523a..95142db6 100644
--- a/benchmarks/html/style.css
+++ b/benchmarks/html/style.css
@@ -28,7 +28,13 @@
     line-height: 1.2;
 }
 
-.benchmark-table td, table th {
+.benchmark-table td {
+    text-align: left;
+    vertical-align: middle;
+    min-width: 90px;
+}
+
+.benchmark-table th {
     text-align: left;
     vertical-align: middle;
 }
diff --git a/benchmarks/navierstokes/plot_navierstokes.py b/benchmarks/navierstokes/plot_navierstokes.py
new file mode 100644
index 00000000..5f299f77
--- /dev/null
+++ b/benchmarks/navierstokes/plot_navierstokes.py
@@ -0,0 +1,89 @@
+import torch
+import matplotlib.pyplot as plt
+from continuity.benchmarks import NavierStokes
+from continuity.operators import FourierNeuralOperator
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+ns = NavierStokes()
+
+operator = FourierNeuralOperator(
+    ns.train_dataset.shapes,
+    grid_shape=(64, 64, 10),
+    width=32,
+    depth=4,
+    device=device,
+)
+
+operator.load(
+    "mlruns/271016623891034109/8755b17d3af9494db843e3a8d0c42ad6/artifacts/final.pt"
+)
+operator.eval()
+
+# Compute train loss
+loss_fn = ns.losses[0]
+
+
+def compute_loss(dataset):
+    train_loader = torch.utils.data.DataLoader(dataset, batch_size=1)
+    avg_loss = 0
+    max_loss, min_loss = 0, 1e10
+    max_i, min_i = 0, 0
+    for i, xuyv in enumerate(train_loader):
+        x, u, y, v = [t.to(device) for t in xuyv]
+        loss = loss_fn(operator, x, u, y, v)
+        avg_loss += loss.detach()
+        if loss > max_loss:
+            max_loss = loss
+            max_i = i
+        if loss < min_loss:
+            min_loss = loss
+            min_i = i
+    avg_loss = avg_loss / len(train_loader)
+    return avg_loss, max_loss, max_i, min_loss, min_i
+
+
+loss_train, max_loss, max_i_train, min_loss, min_i_train = compute_loss(
+    ns.train_dataset
+)
+print(f"rel. error train = {loss_train:.4e}")
+print(f"min loss = {min_loss:.4e} at index {min_i_train}")
+print(f"max loss = {max_loss:.4e} at index {max_i_train}")
+
+# Compute test loss
+loss_test, max_loss, max_i_test, min_loss, min_i_test = compute_loss(ns.test_dataset)
+print(f"rel. error test =  {loss_test:.4e}")
+print(f"min loss = {min_loss:.4e} at index {min_i_test}")
+print(f"max loss = {max_loss:.4e} at index {max_i_test}")
+
+
+# Plot
+def plot_sample(split, sample):
+    dataset = ns.train_dataset if split == "train" else ns.test_dataset
+    x, u, y, v = [t.to(device) for t in dataset[sample : sample + 1]]
+    v_pred = operator(x, u, y)
+    v = v.reshape(1, 64, 64, 10, 1).cpu()
+    v_pred = v_pred.reshape(1, 64, 64, 10, 1).detach().cpu()
+
+    fig, axs = plt.subplots(10, 3, figsize=(4, 16))
+
+    axs[0][0].set_title("Truth")
+    axs[0][1].set_title("Prediction")
+    axs[0][2].set_title("Error")
+    for t in range(10):
+        axs[t][0].imshow(v[0, :, :, t, 0], cmap="jet")
+        axs[t][1].imshow(v_pred[0, :, :, t, 0], cmap="jet")
+        im = axs[t][2].imshow((v - v_pred)[0, :, :, t, 0], cmap="jet")
+        fig.colorbar(im, ax=axs[t][2])
+        axs[t][0].axis("off")
+        axs[t][1].axis("off")
+        axs[t][2].axis("off")
+
+    plt.tight_layout()
+    plt.savefig(f"navierstokes/ns_{split}_{sample}.png", dpi=500)
+
+
+plot_sample("train", min_i_train)
+plot_sample("train", max_i_train)
+plot_sample("test", min_i_test)
+plot_sample("test", max_i_test)
diff --git a/benchmarks/navierstokes/results.txt b/benchmarks/navierstokes/results.txt
new file mode 100644
index 00000000..41612c96
--- /dev/null
+++ b/benchmarks/navierstokes/results.txt
@@ -0,0 +1,16 @@
+rel. error train = 1.8508e-02
+min loss = 8.8748e-03 at index 237
+max loss = 3.1433e-02 at index 420
+rel. error test =  1.8408e-01
+min loss = 1.0220e-01 at index 144
+max loss = 4.4655e-01 at index 179
+
+--
+
+Reference: _Li, Zongyi, et al. "Fourier neural operator for parametric partial
+differential equations." arXiv preprint arXiv:2010.08895 (2020)_
+
+Table 1: nu=1e−5  T=20  N=1000
+FNO-3D: 0.1893
+
+(Ours: 1.8408e-01)
diff --git a/benchmarks/navierstokes/run_navierstokes.py b/benchmarks/navierstokes/run_navierstokes.py
new file mode 100644
index 00000000..5ba6a02f
--- /dev/null
+++ b/benchmarks/navierstokes/run_navierstokes.py
@@ -0,0 +1,20 @@
+from functools import partial
+from continuity.benchmarks.run import BenchmarkRunner, RunConfig
+from continuity.benchmarks import NavierStokes
+from continuity.operators import FourierNeuralOperator
+
+config = RunConfig(
+    benchmark_factory=NavierStokes,
+    operator_factory=partial(
+        FourierNeuralOperator,
+        grid_shape=(64, 64, 10),
+        width=32,
+        depth=4,
+    ),
+    lr=1e-3,
+    max_epochs=100,
+    batch_size=10,
+)
+
+if __name__ == "__main__":
+    BenchmarkRunner.run(config)
diff --git a/build_scripts/copy_benchmarks.py b/build_scripts/copy_benchmarks.py
index 24d2d2fc..c776881d 100644
--- a/build_scripts/copy_benchmarks.py
+++ b/build_scripts/copy_benchmarks.py
@@ -44,3 +44,5 @@ def on_shutdown():
         file.unlink()
     for file in docs_benchmarks_dir.glob("*.png"):
         file.unlink()
+    for file in docs_benchmarks_dir.glob("*.svg"):
+        file.unlink()
diff --git a/data/.dvc/.gitignore b/data/.dvc/.gitignore
new file mode 100644
index 00000000..528f30c7
--- /dev/null
+++ b/data/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/data/.dvc/config b/data/.dvc/config
new file mode 100644
index 00000000..90e4cdd2
--- /dev/null
+++ b/data/.dvc/config
@@ -0,0 +1,4 @@
+[core]
+    remote = gdrive
+['remote "gdrive"']
+    url = gdrive://1Mts9tmPjKzqw-as_j1XTRwa9ulNssy5a
diff --git a/data/.dvcignore b/data/.dvcignore
new file mode 100644
index 00000000..51973055
--- /dev/null
+++ b/data/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 00000000..391b9bd7
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1,2 @@
+/flame
+/navierstokes
diff --git a/data/README.md b/data/README.md
new file mode 100644
index 00000000..f95c8029
--- /dev/null
+++ b/data/README.md
@@ -0,0 +1,37 @@
+# Data
+
+## Prerequisites
+
+We use `dvc` to manage the data. You can install the required packages by
+installing the benchmarks requirements.
+
+```
+pip install -e .[benchmarks]
+```
+
+## Downloading the data
+
+The data is stored in a remote storage on GDrive.
+To download the data, you can run:
+
+```
+cd data
+dvc pull <NAME>
+```
+
+where `<NAME>` is the name of the data set you want to download,
+e.g., `flame` or `navierstokes`, or empty.
+
+
+## Data sets
+
+### FLAME
+
+`data/flame` contains the dataset from [2023 FLAME AI
+Challenge](https://www.kaggle.com/competitions/2023-flame-ai-challenge/data).
+
+### Navier-Stokes
+
+`data/navierstokes` contains a part of the dataset linked in
+[neuraloperator/graph-pde](https://github.com/neuraloperator/graph-pde)
+(Zongyi Li et al. 2020).
diff --git a/data/flame.dvc b/data/flame.dvc
new file mode 100644
index 00000000..64dc18cd
--- /dev/null
+++ b/data/flame.dvc
@@ -0,0 +1,5 @@
+outs:
+- md5: 2e61c8311b09a4fdf29d3ec3527cf629.dir
+  size: 415040265
+  nfiles: 13138
+  path: flame
diff --git a/data/flame/.gitignore b/data/flame/.gitignore
deleted file mode 100644
index 7631a5d2..00000000
--- a/data/flame/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.csv
-*.dat
diff --git a/data/flame/README.md b/data/flame/README.md
deleted file mode 100644
index ded21e26..00000000
--- a/data/flame/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-Dataset from [2023 FLAME AI
-Challenge](https://www.kaggle.com/competitions/2023-flame-ai-challenge/data).
diff --git a/data/navierstokes.dvc b/data/navierstokes.dvc
new file mode 100644
index 00000000..0d1165c8
--- /dev/null
+++ b/data/navierstokes.dvc
@@ -0,0 +1,6 @@
+outs:
+- md5: 0bb228674e976bab14e9493606e14a27.dir
+  size: 412877192
+  nfiles: 1
+  hash: md5
+  path: navierstokes
diff --git a/docs/benchmarks/.gitignore b/docs/benchmarks/.gitignore
index 247f0b5b..6771891a 100644
--- a/docs/benchmarks/.gitignore
+++ b/docs/benchmarks/.gitignore
@@ -1,3 +1,5 @@
 table.html
 style.css
 img
+*.png
+*.svg
diff --git a/docs/benchmarks/index.md b/docs/benchmarks/index.md
index 2e2be50e..008c3f64 100644
--- a/docs/benchmarks/index.md
+++ b/docs/benchmarks/index.md
@@ -2,6 +2,41 @@ This is an overview of some benchmark results to compare the performance
 of different operator architectures on various problems.
 
 The benchmarks are implemented in the `benchmarks` directory and we refer to
-this directory for more information on how the benchmarks are run.
+this directory for detailed information on how the benchmarks are run.
+
+## [NavierStokes](../api/continuity/benchmarks/#continuity.benchmarks.NavierStokes)
+
+Reference: _Li, Zongyi, et al. "Fourier neural operator for parametric partial
+differential equations." arXiv preprint arXiv:2010.08895 (2020)_ _Table 1 ($\nu$ = 1e−5  T=20  N=1000)_
+
+_reported for_ FNO-3D: __0.1893__ (rel. test error)
+
+[FourierNeuralOperator](../api/continuity/operators/#continuity.operators.FourierNeuralOperator):
+0.0185 (rel. train error)  __0.1841__ (rel. test error)
+
+<table>
+<tr>
+<td>
+Best training sample<br>
+<img src="img/ns_train_237.png" alt="Best training sample"/>
+rel. error = 8.8748e-03
+</td>
+<td>
+Worst training sample<br>
+<img src="img/ns_train_420.png" alt="Worst training sample"/>
+rel. error = 3.1433e-02
+</td>
+<td>
+Best test sample<br>
+<img src="img/ns_test_144.png" alt="Best test sample"/>
+rel. error = 1.0220e-01
+</td>
+<td>
+Worst test sample<br>
+<img src="img/ns_test_179.png" alt="Worst test sample"/>
+rel. error = 4.4655e-01
+</td>
+</tr>
+</table>
 
 {% include 'benchmarks/table.html' %}
diff --git a/pyproject.toml b/pyproject.toml
index 1ab7298c..dd35e401 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,8 +80,11 @@ docs = [
     "pygments",
 ]
 benchmark = [
+    "dvc",
+    "dvc-gdrive",
     "mlflow",
     "optuna>=3.5.0,<4.0.0",
+    "scipy",
 ]
 
 [tool.setuptools.dynamic]
diff --git a/src/continuity/benchmarks/__init__.py b/src/continuity/benchmarks/__init__.py
index 9d85c3da..69809e30 100644
--- a/src/continuity/benchmarks/__init__.py
+++ b/src/continuity/benchmarks/__init__.py
@@ -7,5 +7,6 @@
 from .benchmark import Benchmark
 from .sine import SineRegular, SineUniform
 from .flame import Flame
+from .navierstokes import NavierStokes
 
-__all__ = ["Benchmark", "SineRegular", "SineUniform", "Flame"]
+__all__ = ["Benchmark", "SineRegular", "SineUniform", "Flame", "NavierStokes"]
diff --git a/src/continuity/benchmarks/navierstokes.py b/src/continuity/benchmarks/navierstokes.py
new file mode 100644
index 00000000..1bf32fc9
--- /dev/null
+++ b/src/continuity/benchmarks/navierstokes.py
@@ -0,0 +1,114 @@
+"""
+`continuity.benchmarks.navier_stokes`
+
+Navier-Stokes benchmark.
+"""
+
+import torch
+import scipy.io
+import pathlib
+import continuity
+from typing import Optional
+from continuity.benchmarks import Benchmark
+from continuity.operators.losses import RelativeL1Error
+from continuity.data.dataset import OperatorDataset
+
+
+class NavierStokes(Benchmark):
+    r"""Navier-Stokes benchmark.
+
+    This benchmark contains a dataset of turbulent flow samples taken from
+    [neuraloperator/graph-pde](https://github.com/neuraloperator/graph-pde)
+    that was used as illustrative example in the FNO paper:
+
+    _Li, Zongyi, et al. "Fourier neural operator for parametric partial
+    differential equations." arXiv preprint arXiv:2010.08895 (2020)_.
+
+    The dataset loads the `NavierStokes_V1e-5_N1200_T20` file which contains
+    1200 samples of Navier-Stokes flow simulations at a spatial resolution of
+    64x64 and 20 time steps.
+
+    The benchmark exports operator datasets where both input and output function
+    are defined on the space-time domain (periodic in space), i.e.,
+    $(x, y, t) \in [-1, 1] \times [-1, 1] \times (-1, 0]$ for the input
+    function and $(x, y, t) \in [-1, 1] \times [-1, 1] \times (0, 1]$ for
+    the output function.
+
+    The input function is given by the vorticity field at the first ten time
+    steps $(-0.9, -0.8, ..., 0.0)$ and the output function by the vorticity
+    field at the following ten time steps $(0.1, 0.2, ..., 1.0)$.
+
+    ![Visualization of first training sample.](/continuity/benchmarks/navierstokes.png)
+
+    The datasets have the following shapes:
+
+    ```
+        len(benchmark.train_dataset) == 1000
+        len(benchmark.test_dataset) == 200
+
+        x.shape == (64 * 64 * 10, 3)
+        u.shape == (64 * 64 * 10, 1)
+        y.shape == (64 * 64 * 10, 3)
+        v.shape == (64 * 64 * 10, 1)
+    ```
+
+    Args:
+        dir: Path to data set. Default is `data/navierstokes`
+            in the root directory of the repository.
+    """
+
+    def __init__(self, dir: Optional[str] = None):
+        if dir is None:
+            # Get root dir relative to this file
+            root_dir = pathlib.Path(continuity.__file__).parent.parent.parent
+            dir = root_dir / "data" / "navierstokes"
+        else:
+            dir = pathlib.Path(dir)
+
+        # Create space-time grids (x_1, x_2, t)
+        ls = torch.linspace(-1, 1, 64)
+        tx = torch.linspace(-0.9, 0.0, 10)
+        grid_x = torch.meshgrid(ls, ls, tx, indexing="ij")
+        x = torch.stack(grid_x, axis=3).reshape(1, -1, 3).repeat(1200, 1, 1)
+        assert x.shape == (1200, 64 * 64 * 10, 3)
+
+        ty = torch.linspace(0.1, 1.0, 10)
+        grid_y = torch.meshgrid(ls, ls, ty, indexing="ij")
+        y = torch.stack(grid_y, axis=3).reshape(1, -1, 3).repeat(1200, 1, 1)
+        assert y.shape == (1200, 64 * 64 * 10, 3)
+
+        # Load vorticity
+        data = scipy.io.loadmat(dir / "NavierStokes_V1e-5_N1200_T20.mat")
+        vort0 = torch.tensor(data["a"], dtype=torch.float32)
+        vort = torch.tensor(data["u"], dtype=torch.float32)
+        assert vort0.shape == (1200, 64, 64)
+        assert vort.shape == (1200, 64, 64, 20)
+
+        # Input is vorticity for t \in [0, 10]
+        u = torch.cat(
+            (vort0.reshape(-1, 64, 64, 1), vort[:, :, :, :9]),
+            axis=3,
+        ).reshape(1200, 64 * 64 * 10, 1)
+
+        # Output is vorticity for t \in [10, 20]
+        v = vort[:, :, :, 10:].reshape(1200, 64 * 64 * 10, 1)
+
+        # Split train/test
+        train_indices = torch.arange(1000)
+        test_indices = torch.arange(1000, 1200)
+
+        train_dataset = OperatorDataset(
+            x=x[train_indices],
+            u=u[train_indices],
+            y=y[train_indices],
+            v=v[train_indices],
+        )
+
+        test_dataset = OperatorDataset(
+            x=x[test_indices],
+            u=u[test_indices],
+            y=y[test_indices],
+            v=v[test_indices],
+        )
+
+        super().__init__(train_dataset, test_dataset, [RelativeL1Error()])
diff --git a/src/continuity/benchmarks/run/runner.py b/src/continuity/benchmarks/run/runner.py
index 287d27d8..ef3faf34 100644
--- a/src/continuity/benchmarks/run/runner.py
+++ b/src/continuity/benchmarks/run/runner.py
@@ -26,7 +26,7 @@ def run(config: RunConfig, params_dict: Optional[dict] = None) -> float:
 
         """
         # Device
-        device = config.device or get_device()
+        device = torch.device(config.device) or get_device()
 
         # Rank
         rank = device.index or 0
diff --git a/src/continuity/operators/cnn.py b/src/continuity/operators/cnn.py
index 35c4ecac..40070e17 100644
--- a/src/continuity/operators/cnn.py
+++ b/src/continuity/operators/cnn.py
@@ -5,7 +5,7 @@
 """
 
 import torch
-from typing import Optional
+from typing import Optional, Tuple
 from continuity.operators import Operator
 from continuity.operators.shape import OperatorShapes
 
@@ -20,6 +20,8 @@ class ConvolutionalNeuralNetwork(Operator):
         width: The number hidden channels.
         depth: The number of hidden layers.
         kernel_size: The size of the convolutional kernel.
+        grid_shape: x and y have to be sampled on a grid. If grid dimensions
+            are not specified, a grid with equal sizes is assumed.
         act: Activation function.
         device: Device.
     """
@@ -30,13 +32,15 @@ def __init__(
         width: int = 16,
         depth: int = 3,
         kernel_size: int = 3,
+        grid_shape: Optional[Tuple[int]] = None,
         act: Optional[torch.nn.Module] = None,
         device: Optional[torch.device] = None,
     ):
-        assert depth > 1, "Depth is at least one."
+        assert depth >= 1, "Depth is at least one."
         super().__init__(shapes, device)
 
         self.act = torch.nn.Tanh() if act is None else act
+        self.grid_shape = grid_shape
         padding = kernel_size // 2
 
         assert shapes.x.dim in [1, 2, 3], "Only 1D, 2D, and 3D grids supported."
@@ -70,8 +74,11 @@ def forward(
             The output of the operator, of shape (batch_size, #evaluations, v_dim).
         """
         # Transform input to (batch_size, u_dim, ux, uy, ...)
-        per_dim = int(self.shapes.u.num ** (1 / self.shapes.x.dim))
-        u = u.reshape([-1] + [per_dim] * self.shapes.x.dim + [self.shapes.u.dim])
+        if self.grid_shape is not None:
+            u = u.reshape(-1, *self.grid_shape, self.shapes.u.dim)
+        else:
+            per_dim = int(self.shapes.u.num ** (1 / self.shapes.x.dim))
+            u = u.reshape([-1] + [per_dim] * self.shapes.x.dim + [self.shapes.u.dim])
         u = u.swapaxes(1, -1)
 
         # Convolutional layers
diff --git a/src/continuity/operators/fno.py b/src/continuity/operators/fno.py
index d333b99b..c3d0759f 100644
--- a/src/continuity/operators/fno.py
+++ b/src/continuity/operators/fno.py
@@ -21,8 +21,9 @@ class FourierNeuralOperator(NeuralOperator):
         shapes: Shapes of the input and output data.
         depth: Number of Fourier layers.
         width: Latent dimension of the Fourier layers.
-        act: Activation function. Default is tanh.
+        act: Activation function.
         device: Device.
+        **kwargs: Additional arguments for the Fourier layers.
     """
 
     def __init__(
@@ -32,6 +33,7 @@ def __init__(
         width: int = 3,
         act: Optional[torch.nn.Module] = None,
         device: Optional[torch.device] = None,
+        **kwargs,
     ):
         latent_shapes = OperatorShapes(
             x=shapes.x,
@@ -46,9 +48,11 @@ def __init__(
             v=TensorShape(shapes.v.num, width),
         )
 
-        layers = [
-            FourierLayer(latent_shapes, device=device) for _ in range(depth - 1)
-        ] + [FourierLayer(output_shapes, device=device)]
+        layers = []
+        for _ in range(depth - 1):
+            layers += [FourierLayer(latent_shapes, device=device, **kwargs)]
+        layers += [FourierLayer(output_shapes, device=device, **kwargs)]
+
         layers = torch.nn.ModuleList(layers)
 
         super().__init__(shapes, layers, act, device)
diff --git a/src/continuity/operators/losses.py b/src/continuity/operators/losses.py
index e956ca4b..2b284fc0 100644
--- a/src/continuity/operators/losses.py
+++ b/src/continuity/operators/losses.py
@@ -78,3 +78,41 @@ def __call__(
 
         # Return MSE
         return self.mse(v_pred, v)
+
+
+class RelativeL1Error(Loss):
+    """Computes the relative L1 error between the predicted and true labels.
+
+    ```python
+    loss = l1(v, op(x, u, y)) / l1(v, 0)
+    ```
+    """
+
+    def __init__(self):
+        self.l1 = torch.nn.L1Loss()
+
+    def __call__(
+        self,
+        op: "Operator",
+        x: torch.Tensor,
+        u: torch.Tensor,
+        y: torch.Tensor,
+        v: torch.Tensor,
+    ) -> torch.Tensor:
+        """Evaluate relative L1 error.
+
+        Args:
+            op: Operator object
+            x: Tensor of sensor positions of shape (batch_size, num_sensors, coordinate_dim)
+            u: Tensor of sensor values of shape (batch_size, num_sensors, num_channels)
+            y: Tensor of evaluation coordinates of shape (batch_size, x_size, coordinate_dim)
+            v: Tensor of labels of shape (batch_size, x_size, coordinate_dim)
+        """
+        # Call operator
+        v_pred = op(x, u, y)
+
+        # Align shapes
+        v_pred = v_pred.reshape(v.shape)
+
+        # Return relative L1 error
+        return self.l1(v, v_pred) / self.l1(v, torch.zeros_like(v))
diff --git a/src/continuity/operators/neuraloperator.py b/src/continuity/operators/neuraloperator.py
index d544619a..c23356fa 100644
--- a/src/continuity/operators/neuraloperator.py
+++ b/src/continuity/operators/neuraloperator.py
@@ -25,7 +25,7 @@ class NeuralOperator(Operator):
     Args:
         shapes: Shapes of the input and output data.
         layers: List of operator layers.
-        act: Activation function. Default is tanh.
+        act: Activation function.
         device: Device.
     """
 
@@ -39,7 +39,7 @@ def __init__(
         super().__init__(shapes, device)
 
         self.layers = torch.nn.ModuleList(layers)
-        self.act = act or torch.nn.Tanh()
+        self.act = act or torch.nn.GELU()
 
         self.first_dim = layers[0].shapes.u.dim
         self.last_dim = layers[-1].shapes.v.dim
@@ -57,7 +57,13 @@ def __init__(
         self.W = torch.nn.ModuleList(
             [
                 torch.nn.Linear(layer.shapes.u.dim, layer.shapes.v.dim, device=device)
-                for layer in layers
+                for layer in layers[:-1]
+            ]
+        )
+        self.norms = torch.nn.ModuleList(
+            [
+                torch.nn.LayerNorm(layer.shapes.v.dim, device=device)
+                for layer in layers[:-1]
             ]
         )
 
@@ -82,8 +88,10 @@ def forward(
         v = v.reshape(-1, self.shapes.u.num, self.first_dim)
 
         # Hidden layers
-        for i, layer in enumerate(self.layers[:-1]):
-            v = self.act(layer(x, v, x) + self.W[i](v))
+        for layer, W, norm in zip(self.layers[:-1], self.W, self.norms):
+            v = layer(x, v, x) + W(v)
+            v = self.act(v)
+            v = norm(v)
 
         # Last layer (evaluates y)
         v = self.layers[-1](x, v, y)
diff --git a/src/continuity/trainer/callbacks.py b/src/continuity/trainer/callbacks.py
index 2b36c908..7ed5f522 100644
--- a/src/continuity/trainer/callbacks.py
+++ b/src/continuity/trainer/callbacks.py
@@ -224,7 +224,7 @@ def __call__(self, logs: Logs):
         loss = logs.loss_train
         if logs.loss_test is not None:
             mlflow.log_metric("loss/test", logs.loss_test, step=logs.epoch)
-            loss = logs.loss_train
+            loss = logs.loss_test
 
         # Save best model
         self.best_loss = min(self.best_loss, loss)
diff --git a/src/continuity/trainer/trainer.py b/src/continuity/trainer/trainer.py
index ee123e25..5cac8fca 100644
--- a/src/continuity/trainer/trainer.py
+++ b/src/continuity/trainer/trainer.py
@@ -182,7 +182,6 @@ def fit(
 
         # Train
         loss_train, loss_test, epoch = None, None, 0
-        operator.train()
         for epoch in range(epochs):
             loss_train = 0
 
@@ -197,6 +196,7 @@ def fit(
                 loss_test=loss_test,
             )
 
+            operator.train()
             for xuyv in data_loader:
                 xuyv = [t.to(self.device) for t in xuyv]
 
@@ -218,10 +218,9 @@ def closure(xuyv=xuyv):
                 for callback in callbacks:
                     callback.step(logs)
 
-            loss_train /= len(data_loader)
-
             # Compute test loss
             if test_dataset is not None:
+                operator.eval()
                 loss_test = 0
                 for xuyv in test_data_loader:
                     xuyv = [t.to(self.device) for t in xuyv]
diff --git a/tests/benchmarks/test_navierstokes.py b/tests/benchmarks/test_navierstokes.py
new file mode 100644
index 00000000..4fd97938
--- /dev/null
+++ b/tests/benchmarks/test_navierstokes.py
@@ -0,0 +1,52 @@
+import pytest
+import matplotlib.pyplot as plt
+from pathlib import Path
+from continuity.benchmarks import Benchmark, NavierStokes
+from continuity.data import OperatorDataset
+
+
+def check_data_exists():
+    path = Path.joinpath(Path("data"), "navierstokes")
+    if not path.exists():
+        pytest.skip("Data not available")
+
+
+def test_navierstokes_return_type_correct():
+    check_data_exists()
+    benchmark = NavierStokes()
+    assert isinstance(benchmark.train_dataset, OperatorDataset)
+    assert isinstance(benchmark.test_dataset, OperatorDataset)
+
+
+def test_navierstokes_can_initialize_default():
+    check_data_exists()
+    assert isinstance(NavierStokes(), Benchmark)
+
+
+def test_navierstokes_shapes_and_plot():
+    check_data_exists()
+    benchmark = NavierStokes()
+    assert len(benchmark.train_dataset) == 1000
+    assert len(benchmark.test_dataset) == 200
+    for dataset in [benchmark.train_dataset, benchmark.test_dataset]:
+        for x, u, y, v in dataset:
+            assert x.shape == (64 * 64 * 10, 3)
+            assert u.shape == (64 * 64 * 10, 1)
+            assert y.shape == (64 * 64 * 10, 3)
+            assert v.shape == (64 * 64 * 10, 1)
+
+    fig, axs = plt.subplots(1, 2, subplot_kw={"projection": "3d"}, figsize=(10, 5))
+    x, u, y, v = benchmark.test_dataset[0]
+    axs[0].scatter(x[:, 2], x[:, 0], x[:, 1], s=1, c=u, cmap="jet", alpha=0.7)
+    axs[1].scatter(y[:, 2], y[:, 0], y[:, 1], s=1, c=v, cmap="jet", alpha=0.7)
+    for i in range(2):
+        axs[i].set_xlabel("t")
+        axs[i].set_ylabel("x")
+        axs[i].set_zlabel("y")
+    axs[0].set_title("Input")
+    axs[1].set_title("Output")
+
+    try:
+        fig.savefig("docs/benchmarks/navierstokes.png", dpi=500)
+    except FileNotFoundError:
+        pass