diff --git a/README.md b/README.md
index eea7abe..019b536 100644
--- a/README.md
+++ b/README.md
@@ -115,12 +115,12 @@ Arguments:
 
 <div class="small">
 
-| Name                             | Type   | Description                      |
-|:---------------------------------|:-------|:---------------------------------|
-| `--input`                        | `file` | A subset of the common dataset.  |
+| Name | Type | Description |
+|:---|:---|:---|
+| `--input` | `file` | A subset of the common dataset. |
 | `--output_unintegrated_censored` | `file` | (*Output*) Unintegrated dataset. |
-| `--output_unintegrated`          | `file` | (*Output*) Unintegrated dataset. |
-| `--output_validation`            | `file` | (*Output*) Validation dataset.   |
+| `--output_unintegrated` | `file` | (*Output*) Unintegrated dataset. |
+| `--output_validation` | `file` | (*Output*) Hold-out dataset for validation. |
 
 </div>
 
@@ -216,11 +216,18 @@ Data structure:
 
 ## File format: Validation
 
-Validation dataset
+Hold-out dataset for validation.
 
 Example file:
 `resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad`
 
+Description:
+
+Samples that were held out and will later be used only to assess whether
+the batch integration was successful. E.g. if a donor from batch 2 was
+corrected towards batch 1, but also actually measured in batch 1
+(without being used as input to the algorithm).
+
 Format:
 
 <div class="small">
@@ -283,11 +290,11 @@ Arguments:
 
 <div class="small">
 
-| Name                   | Type   | Description                    |
-|:-----------------------|:-------|:-------------------------------|
-| `--input_unintegrated` | `file` | Unintegrated dataset.          |
-| `--input_validation`   | `file` | Validation dataset.            |
-| `--output`             | `file` | (*Output*) Integrated dataset. |
+| Name                   | Type   | Description                      |
+|:-----------------------|:-------|:---------------------------------|
+| `--input_unintegrated` | `file` | Unintegrated dataset.            |
+| `--input_validation`   | `file` | Hold-out dataset for validation. |
+| `--output`             | `file` | (*Output*) Integrated dataset.   |
 
 </div>
 
@@ -301,7 +308,7 @@ Arguments:
 
 | Name | Type | Description |
 |:---|:---|:---|
-| `--input_validation` | `file` | Validation dataset. |
+| `--input_validation` | `file` | Hold-out dataset for validation. |
 | `--input_unintegrated` | `file` | Unintegrated dataset. |
 | `--input_integrated` | `file` | Integrated dataset. |
 | `--output` | `file` | (*Output*) File indicating the score of a metric. |
@@ -320,8 +327,8 @@ Format:
 <div class="small">
 
     AnnData object
-     obs: 'label_pred'
-     uns: 'dataset_id', 'normalization_id', 'method_id'
+     layers: 'integrated'
+     uns: 'dataset_id', 'method_id', 'parameters'
 
 </div>
 
@@ -329,12 +336,12 @@ Data structure:
 
 <div class="small">
 
-| Slot                      | Type     | Description                          |
-|:--------------------------|:---------|:-------------------------------------|
-| `obs["label_pred"]`       | `string` | Predicted labels for the test cells. |
-| `uns["dataset_id"]`       | `string` | A unique identifier for the dataset. |
-| `uns["normalization_id"]` | `string` | Which normalization was used.        |
-| `uns["method_id"]`        | `string` | A unique identifier for the method.  |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `layers["integrated"]` | `double` | The integrated data as returned by a batch correction method. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["method_id"]` | `string` | A unique identifier for the method. |
+| `uns["parameters"]` | `object` | (*Optional*) The parameters used for the integration. |
 
 </div>
 
diff --git a/src/api/file_integrated.yaml b/src/api/file_integrated.yaml
index 94d9061..b2c6a34 100644
--- a/src/api/file_integrated.yaml
+++ b/src/api/file_integrated.yaml
@@ -1,4 +1,3 @@
-#TODO: Change to the required and/or optional fields of the anndata
 type: file
 example: "resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/prediction.h5ad"
 label: Integrated
@@ -6,21 +5,21 @@ summary: "Integrated dataset"
 info:
   format:
     type: h5ad
-    obs:
-      - type: string
-        name: label_pred
-        description: Predicted labels for the test cells.
+    layers:
+      - type: double
+        name: integrated
+        description: The integrated data as returned by a batch correction method
         required: true
     uns:
       - type: string
         name: dataset_id
         description: "A unique identifier for the dataset"
         required: true
-      - type: string
-        name: normalization_id
-        description: "Which normalization was used"
-        required: true
       - type: string
         name: method_id
         description: "A unique identifier for the method"
-        required: true
\ No newline at end of file
+        required: true
+      - type: object
+        name: parameters
+        description: "The parameters used for the integration"
+        required: false
diff --git a/src/api/file_validation.yaml b/src/api/file_validation.yaml
index 00b95ba..ddbd980 100644
--- a/src/api/file_validation.yaml
+++ b/src/api/file_validation.yaml
@@ -1,8 +1,12 @@
-#TODO: Change to the required and/or optional fields of the anndata
 type: file
 example: "resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad"
-label: "Validation"
-summary: "Validation dataset"
+label: Validation
+summary: Hold-out dataset for validation. 
+description: |
+  Samples that were held out and will later be used only to assess whether
+  the batch integration was successful. E.g. if a donor from batch 2 was corrected towards batch 1,
+  but also actually measured in batch 1 (without being used as input to the algorithm).
+  
 info:
   format:
     type: h5ad