forked from ga4gh/ga4gh-schemas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
metadata.avdl
142 lines (113 loc) · 4.21 KB
/
metadata.avdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
@namespace("org.ga4gh.models")
/**
This protocol defines metadata used in the other GA4GH protocols.
*/
protocol Metadata {
import idl "common.avdl";
/**
An experimental preparation of a `Sample`.
*/
record Experiment {
/** The experiment UUID. This is globally unique. */
string id;
/** The name of the experiment. */
union { null, string } name = null;
/** A description of the experiment. */
union { null, string } description = null;
/**
The time at which this record was created.
Format: ISO 8601, YYYY-MM-DDTHH:MM:SS.SSS (e.g. 2015-02-10T00:03:42.123Z)
*/
string recordCreateTime;
/**
The time at which this record was last updated.
Format: ISO 8601, YYYY-MM-DDTHH:MM:SS.SSS (e.g. 2015-02-10T00:03:42.123Z)
*/
string recordUpdateTime;
/**
The time at which this experiment was performed.
Granularity here is variabel (e.g. date only).
Format: ISO 8601, YYYY-MM-DDTHH:MM:SS (e.g. 2015-02-10T00:03:42)
*/
union { null, string } runTime = null;
/**
The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
*/
union { null, string } molecule = null;
/**
The experiment technique or strategy applied to the sample.
(e.g. whole genome sequencing, RNA-seq, RIP-seq)
*/
union { null, string } strategy = null;
/**
The method used to enrich the target. (e.g. immunoprecipitation, size
fractionation, MNase digestion)
*/
union { null, string } selection = null;
/** The name of the library used as part of this experiment. */
union { null, string } library = null;
/** The configuration of sequenced reads. (e.g. Single or Paired) */
union { null, string } libraryLayout = null;
/**
The instrument model used as part of this experiment.
This maps to sequencing technology in BAM.
*/
union { null, string } instrumentModel;
/**
The data file generated by the instrument.
TODO: This isn't actually a file is it?
Should this be `instrumentData` instead?
*/
union { null, string } instrumentDataFile = null;
/** The sequencing center used as part of this experiment. */
union { null, string } sequencingCenter;
/**
The platform unit used as part of this experiment. This is a flowcell-barcode
or slide unique identifier.
*/
union { null, string } platformUnit = null;
/**
A map of additional experiment information.
*/
map<array<string>> info = {};
}
/**
NOTE: there's ongoing discussion about changing the role of Dataset, possibly as follows:
Represents a group of contextually related data objects of (e.g. all Individuals, Samples,
Experiments associated with a particular feature; or e.g. a trio in genetic diagnostics.).
This concept may be expanded in the future (ontology for describing the type of dataset ...).
TODO: Determination of scope, structure, specific attributes, e.g. limiting to single
record type - see http://purl.obolibrary.org/obo/IAO_0000100 - and providing alternative mechanism
for heterogeneous data with external contextualization, e.g. all records of different
types associated with a clinical study.
*/
/**
A Dataset is a data-provider-specified collection of related data of multiple types.
Logically, it's akin to a folder -- it's up to the provider what goes into the folder.
For server implementors, they're a useful level of granularity for implementing
administrative features such as access control (e.g. Dataset X is public;
Dataset Y is only available to lab Z's collaborators) and billing (e.g. the costs
of hosting Dataset Y should be charged to lab Z).
For data curators, they're 'the simplest thing that could possibly work' for grouping
data (e.g. Dataset X has all the reads, variants, and expression levels for a
particular research project; Dataset Y has all the work product from a particular
grant).
For data accessors, they're a simple way to scope exploration and analysis
(e.g. are there any supporting examples in 1000genomes?
what's the distribution of that result in the data from our project?)
*/
record Dataset {
/**
The dataset's id, (at least) locally unique.
*/
string id;
/**
The name of the dataset.
*/
union { null, string } name = null;
/**
Additional, human-readable information on the dataset.
*/
union { null, string } description = null;
}
}