diff --git a/README.md b/README.md
index eea7abe..019b536 100644
--- a/README.md
+++ b/README.md
@@ -115,12 +115,12 @@ Arguments:
-| Name | Type | Description |
-|:---------------------------------|:-------|:---------------------------------|
-| `--input` | `file` | A subset of the common dataset. |
+| Name | Type | Description |
+|:---|:---|:---|
+| `--input` | `file` | A subset of the common dataset. |
| `--output_unintegrated_censored` | `file` | (*Output*) Unintegrated dataset. |
-| `--output_unintegrated` | `file` | (*Output*) Unintegrated dataset. |
-| `--output_validation` | `file` | (*Output*) Validation dataset. |
+| `--output_unintegrated` | `file` | (*Output*) Unintegrated dataset. |
+| `--output_validation` | `file` | (*Output*) Hold-out dataset for validation. |
@@ -216,11 +216,18 @@ Data structure:
## File format: Validation
-Validation dataset
+Hold-out dataset for validation.
Example file:
`resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad`
+Description:
+
+Samples that were held out and will later be used only to assess whether
+the batch integration was successful. E.g. if a donor from batch 2 was
+corrected towards batch 1, but also actually measured in batch 1
+(without being used as input to the algorithm).
+
Format:
@@ -283,11 +290,11 @@ Arguments:
-| Name | Type | Description |
-|:-----------------------|:-------|:-------------------------------|
-| `--input_unintegrated` | `file` | Unintegrated dataset. |
-| `--input_validation` | `file` | Validation dataset. |
-| `--output` | `file` | (*Output*) Integrated dataset. |
+| Name | Type | Description |
+|:-----------------------|:-------|:---------------------------------|
+| `--input_unintegrated` | `file` | Unintegrated dataset. |
+| `--input_validation` | `file` | Hold-out dataset for validation. |
+| `--output` | `file` | (*Output*) Integrated dataset. |
@@ -301,7 +308,7 @@ Arguments:
| Name | Type | Description |
|:---|:---|:---|
-| `--input_validation` | `file` | Validation dataset. |
+| `--input_validation` | `file` | Hold-out dataset for validation. |
| `--input_unintegrated` | `file` | Unintegrated dataset. |
| `--input_integrated` | `file` | Integrated dataset. |
| `--output` | `file` | (*Output*) File indicating the score of a metric. |
@@ -320,8 +327,8 @@ Format:
AnnData object
- obs: 'label_pred'
- uns: 'dataset_id', 'normalization_id', 'method_id'
+ layers: 'integrated'
+ uns: 'dataset_id', 'method_id', 'parameters'
@@ -329,12 +336,12 @@ Data structure:
-| Slot | Type | Description |
-|:--------------------------|:---------|:-------------------------------------|
-| `obs["label_pred"]` | `string` | Predicted labels for the test cells. |
-| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
-| `uns["normalization_id"]` | `string` | Which normalization was used. |
-| `uns["method_id"]` | `string` | A unique identifier for the method. |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `layers["integrated"]` | `double` | The integrated data as returned by a batch correction method. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["method_id"]` | `string` | A unique identifier for the method. |
+| `uns["parameters"]` | `object` | (*Optional*) The parameters used for the integration. |
diff --git a/src/api/file_integrated.yaml b/src/api/file_integrated.yaml
index 94d9061..b2c6a34 100644
--- a/src/api/file_integrated.yaml
+++ b/src/api/file_integrated.yaml
@@ -1,4 +1,3 @@
-#TODO: Change to the required and/or optional fields of the anndata
type: file
example: "resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/prediction.h5ad"
label: Integrated
@@ -6,21 +5,21 @@ summary: "Integrated dataset"
info:
format:
type: h5ad
- obs:
- - type: string
- name: label_pred
- description: Predicted labels for the test cells.
+ layers:
+ - type: double
+ name: integrated
+ description: The integrated data as returned by a batch correction method
required: true
uns:
- type: string
name: dataset_id
description: "A unique identifier for the dataset"
required: true
- - type: string
- name: normalization_id
- description: "Which normalization was used"
- required: true
- type: string
name: method_id
description: "A unique identifier for the method"
- required: true
\ No newline at end of file
+ required: true
+ - type: object
+ name: parameters
+ description: "The parameters used for the integration"
+ required: false
diff --git a/src/api/file_validation.yaml b/src/api/file_validation.yaml
index 00b95ba..ddbd980 100644
--- a/src/api/file_validation.yaml
+++ b/src/api/file_validation.yaml
@@ -1,8 +1,12 @@
-#TODO: Change to the required and/or optional fields of the anndata
type: file
example: "resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad"
-label: "Validation"
-summary: "Validation dataset"
+label: Validation
+summary: Hold-out dataset for validation.
+description: |
+ Samples that were held out and will later be used only to assess whether
+ the batch integration was successful. E.g. if a donor from batch 2 was corrected towards batch 1,
+ but also actually measured in batch 1 (without being used as input to the algorithm).
+
info:
format:
type: h5ad