-
Notifications
You must be signed in to change notification settings - Fork 21
/
kb_uploadmethods.spec
628 lines (539 loc) · 20.9 KB
/
kb_uploadmethods.spec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
/*
A KBase module: kb_uploadmethods
*/
module kb_uploadmethods {
/* workspace name of the object */
typedef string workspace_name;
/*
Indicates true or false values, false = 0, true = 1
@range [0,1]
*/
typedef int boolean;
/* input and output file path/url */
typedef string fwd_staging_file_name;
typedef string rev_staging_file_name;
typedef string download_type;
typedef string fwd_file_url;
typedef string rev_file_url;
typedef string sequencing_tech;
typedef string name;
typedef string single_genome;
typedef string interleaved;
typedef string insert_size_mean;
typedef string insert_size_std_dev;
typedef string read_orientation_outward;
typedef string obj_ref;
typedef string report_name;
typedef string report_ref;
typedef structure {
fwd_file_url fwd_file_url;
rev_file_url rev_file_url;
name name;
single_genome single_genome;
interleaved interleaved;
insert_size_mean insert_size_mean;
insert_size_std_dev insert_size_std_dev;
read_orientation_outward read_orientation_outward;
} urls_to_add;
/*
sequencing_tech: sequencing technology
name: output reads file name
workspace_name: workspace name/ID of the object
For files in user's staging area:
fwd_staging_file_name: single-end fastq file name or forward/left paired-end fastq file name from user's staging area
rev_staging_file_name: reverse/right paired-end fastq file name user's staging area
For files from web:
download_type: download type for web source fastq file ('Direct Download', 'FTP', 'DropBox', 'Google Drive')
fwd_file_url: single-end fastq file URL or forward/left paired-end fastq file URL
rev_file_url: reverse/right paired-end fastq file URL
urls_to_add: used for parameter-groups. dict of {fwd_file_url, rev_file_url, name,
single_genome, interleaved, insert_size_mean and read_orientation_outward}
Optional Params:
single_genome: whether the reads are from a single genome or a metagenome.
interleaved: whether reads is interleaved
insert_size_mean: mean (average) insert length
insert_size_std_dev: standard deviation of insert lengths
read_orientation_outward: whether reads in a pair point outward
*/
typedef structure {
workspace_name workspace_name;
fwd_staging_file_name fwd_staging_file_name;
rev_staging_file_name rev_staging_file_name;
download_type download_type;
fwd_file_url fwd_file_url;
rev_file_url rev_file_url;
sequencing_tech sequencing_tech;
name name;
urls_to_add urls_to_add;
single_genome single_genome;
interleaved interleaved;
insert_size_mean insert_size_mean;
insert_size_std_dev insert_size_std_dev;
read_orientation_outward read_orientation_outward;
} UploadMethodParams;
typedef structure {
obj_ref obj_ref;
report_name report_name;
report_ref report_ref;
} UploadMethodResult;
funcdef upload_fastq_file(UploadMethodParams params)
returns (UploadMethodResult returnVal) authentication required;
/*
Required:
genome_name: output genome object name
workspace_name: workspace name/ID of the object
For staging area:
fasta_file: fasta file containing assembled contigs/chromosomes
gff_file: gff file containing predicted gene models and corresponding features
Optional params:
scientific_name - the scientific name of the genome.
taxon_id - the numeric ID of the NCBI taxon to which this genome belongs.
If defined, will try to link the Genome to the specified
taxonomy id in lieu of performing the lookup during upload
source: Source Of The GFF File. Default to 'User'
taxon_wsname - where the reference taxons are. Default to 'ReferenceTaxons'
release: Release Or Version Of The Source Data
genetic_code: Genetic Code For The Organism
type: 'Reference', 'User upload', 'Representative'
*/
typedef structure {
string fasta_file;
string gff_file;
string genome_name;
workspace_name workspace_name;
string genome_type;
string scientific_name;
string source;
string taxon_wsname;
string taxon_id;
string release;
int genetic_code;
string type;
string generate_missing_genes;
} UploadFastaGFFMethodParams;
typedef structure {
string genome_ref;
string genome_info;
report_name report_name;
report_ref report_ref;
} UploadFastaGFFMethodResult;
funcdef upload_fasta_gff_file(UploadFastaGFFMethodParams params)
returns (UploadFastaGFFMethodResult returnVal) authentication required;
/*
Required:
genome_name: output metagenome object name
workspace_name: workspace name/ID of the object
For staging area:
fasta_file: fasta file containing assembled contigs/chromosomes
gff_file: gff file containing predicted gene models and corresponding features
Optional params:
source: Source Of The GFF File. Default to 'User'
taxon_wsname - where the reference taxons are. Default to 'ReferenceTaxons'
taxon_id - if defined, will try to link the Genome to the specified
taxonomy id in lieu of performing the lookup during upload
release: Release Or Version Of The Source Data
genetic_code: Genetic Code For The Organism
type: 'Reference', 'User upload', 'Representative'
*/
typedef structure {
string fasta_file;
string gff_file;
string genome_name;
workspace_name workspace_name;
string source;
string taxon_wsname;
string taxon_id;
string release;
int genetic_code;
string type;
string generate_missing_genes;
} UploadMetagenomeFastaGFFMethodParams;
typedef structure {
string metagenome_ref;
string metagenome_info;
report_name report_name;
report_ref report_ref;
} UploadMetagenomeFastaGFFMethodResult;
funcdef upload_metagenome_fasta_gff_file(UploadMetagenomeFastaGFFMethodParams params)
returns (UploadMetagenomeFastaGFFMethodResult returnVal) authentication required;
typedef structure {
string staging_subdir;
string genome_set_name;
workspace_name workspace_name;
string genome_type;
string source;
string taxon_wsname;
string taxon_id;
string release;
int genetic_code;
string generate_missing_genes;
} BatchGenomeImporterParams;
typedef structure {
string set_ref;
report_name report_name;
report_ref report_ref;
} BatchImporterResult;
funcdef batch_import_genomes_from_staging(BatchGenomeImporterParams params)
returns (BatchImporterResult returnVal) authentication required;
typedef structure {
string staging_subdir;
string assembly_set_name;
workspace_name workspace_name;
int min_contig_length;
string type;
} BatchAssemblyImporterParams;
funcdef batch_import_assemblies_from_staging(BatchAssemblyImporterParams params)
returns (BatchImporterResult returnVal) authentication required;
/* Input parameters for the "unpack_staging_file" function.
Required parameters:
staging_file_subdir_path: subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
workspace_name: workspace name/ID of the object
*/
typedef structure {
workspace_name workspace_name;
string staging_file_subdir_path;
}UnpackStagingFileParams;
/* Results from the unpack_staging_file function.
unpacked_file_path: unpacked file path(s) in staging area
*/
typedef structure {
string unpacked_file_path;
}UnpackStagingFileOutput;
/* Unpack a staging area file */
funcdef unpack_staging_file(UnpackStagingFileParams params)
returns(UnpackStagingFileOutput returnVal) authentication required;
typedef structure{
string file_url;
}urls_to_add_web_unpack;
/* Input parameters for the "unpack_web_file" function.
Required parameters:
workspace_name: workspace name/ID of the object
file_url: file URL
download_type: one of ['Direct Download', 'FTP', 'DropBox', 'Google Drive']
Optional:
urls_to_add_web_unpack: used for parameter-groups. dict of {file_url}
*/
typedef structure {
workspace_name workspace_name;
string file_url;
string download_type;
urls_to_add_web_unpack urls_to_add_web_unpack;
}UnpackWebFileParams;
/* Results from the unpack_web_file function.
unpacked_file_path: unpacked file path(s) in staging area
*/
typedef structure {
string unpacked_file_path;
}UnpackWebFileOutput;
/* Download and unpack a web file to staging area */
funcdef unpack_web_file(UnpackWebFileParams params)
returns(UnpackWebFileOutput returnVal) authentication required;
/*
import_genbank_from_staging: wrapper method for GenomeFileUtil.genbank_to_genome
required params:
staging_file_subdir_path - subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
genome_name - becomes the name of the object
workspace_name - the name of the workspace it gets saved to.
source - Source of the file typically something like RefSeq or Ensembl
optional params:
scientific_name - the scientific name of the genome.
taxon_id - the numeric ID of the NCBI taxon to which this genome belongs.
If defined, will try to link the Genome to the specified
taxonomy id in lieu of performing the lookup during upload
release - Release or version number of the data
per example Ensembl has numbered releases of all their data: Release 31
generate_ids_if_needed - If field used for feature id is not there,
generate ids (default behavior is raising an exception)
generate_missing_genes - Generate gene feature for CDSs that do not have
a parent in file
genetic_code - Genetic code of organism. Overwrites determined GC from
taxon object
type - Reference, Representative or User upload
*/
typedef structure {
string staging_file_subdir_path;
string genome_name;
string workspace_name;
string source;
string genome_type;
string release;
int genetic_code;
string type;
string scientific_name;
string taxon_id;
string generate_ids_if_needed;
string generate_missing_genes;
} GenbankToGenomeParams;
typedef structure {
string genome_ref;
} GenomeSaveResult;
funcdef import_genbank_from_staging(GenbankToGenomeParams params)
returns (GenomeSaveResult returnVal) authentication required;
/*
required params:
staging_file_subdir_path: subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
sequencing_tech: sequencing technology
name: output reads file name
workspace_name: workspace name/ID of the object
Optional Params:
single_genome: whether the reads are from a single genome or a metagenome.
insert_size_mean: mean (average) insert length
insert_size_std_dev: standard deviation of insert lengths
read_orientation_outward: whether reads in a pair point outward
*/
typedef structure {
string staging_file_subdir_path;
sequencing_tech sequencing_tech;
name name;
workspace_name workspace_name;
single_genome single_genome;
insert_size_mean insert_size_mean;
insert_size_std_dev insert_size_std_dev;
read_orientation_outward read_orientation_outward;
} SRAToReadsParams;
funcdef import_sra_from_staging(SRAToReadsParams params)
returns (UploadMethodResult returnVal) authentication required;
/*
download_type: download type for web source fastq file
('Direct Download', 'FTP', 'DropBox', 'Google Drive')
sra_urls_to_add: dict of SRA file URLs
required params:
file_url: SRA file URL
sequencing_tech: sequencing technology
name: output reads file name
workspace_name: workspace name/ID of the object
Optional Params:
single_genome: whether the reads are from a single genome or a metagenome.
insert_size_mean: mean (average) insert length
insert_size_std_dev: standard deviation of insert lengths
read_orientation_outward: whether reads in a pair point outward
*/
typedef structure {
string file_url;
sequencing_tech sequencing_tech;
name name;
single_genome single_genome;
insert_size_mean insert_size_mean;
insert_size_std_dev insert_size_std_dev;
read_orientation_outward read_orientation_outward;
}sra_urls_to_add;
typedef structure {
string download_type;
sra_urls_to_add sra_urls_to_add;
workspace_name workspace_name;
} WebSRAToReadsParams;
typedef structure {
list<string> obj_refs;
report_name report_name;
report_ref report_ref;
} WebSRAToReadsResult;
funcdef import_sra_from_web(WebSRAToReadsParams params)
returns (WebSRAToReadsResult returnVal) authentication required;
/*
required params:
staging_file_subdir_path: subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
assembly_name: output Assembly file name
workspace_name: workspace name/ID of the object
*/
typedef structure {
string staging_file_subdir_path;
string assembly_name;
workspace_name workspace_name;
int min_contig_length;
string type;
} FastaToAssemblyParams;
funcdef import_fasta_as_assembly_from_staging(FastaToAssemblyParams params)
returns (UploadMethodResult returnVal) authentication required;
/*
required params:
staging_file_subdir_path: subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
media_name: output Media file name
workspace_name: workspace name/ID of the object
*/
typedef structure {
string staging_file_subdir_path;
string media_name;
workspace_name workspace_name;
} FileToMediaParams;
funcdef import_tsv_as_media_from_staging(FileToMediaParams params)
returns (UploadMethodResult returnVal) authentication required;
funcdef import_excel_as_media_from_staging(FileToMediaParams params)
returns (UploadMethodResult returnVal) authentication required;
funcdef import_tsv_or_excel_as_media_from_staging(FileToMediaParams params)
returns (UploadMethodResult returnVal) authentication required;
/*
required params:
model_file: subdirectory file path for model file
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
compounds_file: same as above for compound (only used for tsv)
file_type: one of "tsv", "excel", "sbml"
genome: the associated species genome
biomasses: one or more biomass reactions in model
model_name: output FBAModel object name
workspace_name: workspace name/ID of the object
*/
typedef structure {
string model_file;
string compounds_file;
string file_type;
string genome;
string biomass;
string model_name;
workspace_name workspace_name;
} FileToFBAModelParams;
funcdef import_file_as_fba_model_from_staging(FileToFBAModelParams params)
returns (UploadMethodResult returnVal) authentication required;
/*
required params:
staging_file_subdir_path: subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
matrix_name: output Expressin Matirx file name
workspace_name: workspace name/ID of the object
genome_ref: optional reference to a Genome object that will be
used for mapping feature IDs to
fill_missing_values: optional flag for filling in missing
values in matrix (default value is false)
data_type: optional filed, value is one of 'untransformed',
'log2_level', 'log10_level', 'log2_ratio', 'log10_ratio' or
'unknown' (last one is default value)
data_scale: optional parameter (default value is '1.0')
*/
typedef structure {
string staging_file_subdir_path;
workspace_name workspace_name;
string matrix_name;
string genome_ref;
boolean fill_missing_values;
string data_type;
string data_scale;
} FileToMatrixParams;
funcdef import_tsv_as_expression_matrix_from_staging(FileToMatrixParams params)
returns (UploadMethodResult returnVal) authentication required;
/*
sequencing_tech: sequencing technology
name: output reads file name
workspace_name: workspace name/ID of the object
import_type: either FASTQ or SRA
For files in user's staging area:
fastq_fwd_or_sra_staging_file_name: single-end fastq file name Or forward/left paired-end fastq file name from user's staging area Or SRA staging file
fastq_rev_staging_file_name: reverse/right paired-end fastq file name user's staging area
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
Optional Params:
single_genome: whether the reads are from a single genome or a metagenome.
interleaved: whether reads is interleaved
insert_size_mean: mean (average) insert length
insert_size_std_dev: standard deviation of insert lengths
read_orientation_outward: whether reads in a pair point outward
*/
typedef structure {
string import_type;
string fastq_fwd_staging_file_name;
string fastq_rev_staging_file_name;
string sra_staging_file_name;
sequencing_tech sequencing_tech;
workspace_name workspace_name;
string name;
single_genome single_genome;
interleaved interleaved;
insert_size_mean insert_size_mean;
insert_size_std_dev insert_size_std_dev;
read_orientation_outward read_orientation_outward;
} UploadReadsParams;
funcdef import_reads_from_staging(UploadReadsParams params)
returns (UploadMethodResult returnVal) authentication required;
/*
required params:
staging_file_subdir_path: subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
phenotype_set_name: output PhenotypeSet object name
workspace_name: workspace name/ID of the object
optional:
genome: Genome object that contains features referenced by the Phenotype Set
*/
typedef structure {
string staging_file_subdir_path;
workspace_name workspace_name;
string phenotype_set_name;
obj_ref genome;
} FileToPhenotypeSetParams;
funcdef import_tsv_as_phenotype_set_from_staging(FileToPhenotypeSetParams params)
returns (UploadMethodResult returnVal) authentication required;
/*
required params:
staging_file_subdir_path: subdirectory file path
e.g.
for file: /data/bulk/user_name/file_name
staging_file_subdir_path is file_name
for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
staging_file_subdir_path is subdir_1/subdir_2/file_name
attribute_mapping_name: output ConditionSet object name
workspace_id: workspace name/ID of the object
*/
typedef structure {
string staging_file_subdir_path;
workspace_name workspace_name;
string attribute_mapping_name;
} FileToConditionSetParams;
funcdef import_attribute_mapping_from_staging(FileToConditionSetParams params)
returns (UploadMethodResult returnVal) authentication required;
typedef structure {
string staging_file_subdir_path;
int workspace_id;
string escher_map_name;
} EscherMapParams;
funcdef import_eschermap_from_staging(EscherMapParams params)
returns (UploadMethodResult returnVal) authentication required;
typedef structure {
string staging_file_subdir_path;
string workspace_name;
list<string> genome_refs;
string description;
int lookup_gene_matches;
float min_similarity_threshold;
int filter_nomatch;
string seqset_name;
} FASTAToSeqSetParams;
funcdef import_fasta_as_seqset_from_staging(FASTAToSeqSetParams params)
returns (UploadMethodResult returnVal) authentication required;
};