forked from ENCODE-DCC/atac-seq-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
atac.wdl
3024 lines (2821 loc) · 127 KB
/
atac.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
version 1.0
struct RuntimeEnvironment {
String docker
String singularity
String conda
}
workflow atac {
String pipeline_ver = 'v2.2.1'
meta {
version: 'v2.2.1'
author: 'Jin wook Lee'
email: '[email protected]'
description: 'ENCODE ATAC-Seq pipeline. See https://github.com/ENCODE-DCC/atac-seq-pipeline for more details. e.g. example input JSON for Terra/Anvil.'
organization: 'ENCODE DCC'
specification_document: 'https://docs.google.com/document/d/1f0Cm4vRyDQDu0bMehHD7P7KOMxTOP-HiNoIvL1VcBt8/edit?usp=sharing'
default_docker: 'encodedcc/atac-seq-pipeline:v2.2.1'
default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/atac-seq-pipeline_v2.2.1.sif'
default_conda: 'encd-atac'
croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/atac.croo.v5.json'
parameter_group: {
runtime_environment: {
title: 'Runtime environment',
description: 'Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.'
},
pipeline_metadata: {
title: 'Pipeline metadata',
description: 'Metadata for a pipeline (e.g. title and description).'
},
reference_genome: {
title: 'Reference genome',
description: 'Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.',
help: 'Choose one atac.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.'
},
input_genomic_data: {
title: 'Input genomic data',
description: 'Genomic input files for experiment.',
help: 'Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. atac.fastqs_rep1_R1 and atac.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. atac.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with atac.paired_end, if you have mixed SE and PE replicates then define atac.paired_ends instead for each replicate. e.g. atac.paired_ends: [false, true].'
},
adapter_trimming: {
title: 'Adapter trimming',
description: 'Parameters for adapter trimming.',
help: 'Use atac.auto_detect_adapter to automatically detect/trim 3 adapters (Illumina: AGATCGGAAGAGC, Nextera: CTGTCTCTTATA, smallRNA: TGGAATTCTCGG) or manually define adapter sequence to be trimmed (atac.adapter or atac.adapters_repX_RY). Leave all parameters undefined/empty if your FASTQs are already trimmed.'
},
pipeline_parameter: {
title: 'Pipeline parameter',
description: 'Pipeline type and flags to turn on/off analyses.',
help: 'Pipeline can run as DNAse-seq mode. The only difference is TN5-shifting of read in ATAC-seq mode. Use atac.align_only to align FASTQs without peak calling.'
},
alignment: {
title: 'Alignment',
description: 'Parameters for alignment.',
help: 'Pipeline calculates mitochondrial fraction of reads in raw BAM. But after that it filters out mitochondrial reads (e.g. chrM, MT) from NODUP_BAMs (filtered/deduped). It is controlled by atac.filter_chrs array. If you want to keep mitochondrial reads then make this array empty.'
},
peak_calling: {
title: 'Peak calling',
description: 'Parameters for peak calling.',
help: 'This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR.'
},
resource_parameter: {
title: 'Resource parameter',
description: 'Number of CPUs (threads), max. memory and walltime for tasks.',
help: 'Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (atac.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.'
}
}
}
input {
# group: runtime_environment
String docker = 'encodedcc/atac-seq-pipeline:v2.2.1'
String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/atac-seq-pipeline_v2.2.1.sif'
String conda = 'encd-atac'
String conda_macs2 = 'encd-atac-macs2'
String conda_spp = 'encd-atac-spp'
String conda_python2 = 'encd-atac-py2'
# group: pipeline_metadata
String title = 'Untitled'
String description = 'No description'
# group: reference_genome
File? genome_tsv
String? genome_name
File? ref_fa
File? ref_mito_fa
File? bowtie2_idx_tar
File? bowtie2_mito_idx_tar
File? chrsz
File? blacklist
File? blacklist2
String? mito_chr_name
String? regex_bfilt_peak_chr_name
String? gensz
File? tss
File? dnase
File? prom
File? enh
File? reg2map
File? reg2map_bed
File? roadmap_meta
# group: input_genomic_data
Boolean? paired_end
Array[Boolean] paired_ends = []
Array[File] fastqs_rep1_R1 = []
Array[File] fastqs_rep1_R2 = []
Array[File] fastqs_rep2_R1 = []
Array[File] fastqs_rep2_R2 = []
Array[File] fastqs_rep3_R1 = []
Array[File] fastqs_rep3_R2 = []
Array[File] fastqs_rep4_R1 = []
Array[File] fastqs_rep4_R2 = []
Array[File] fastqs_rep5_R1 = []
Array[File] fastqs_rep5_R2 = []
Array[File] fastqs_rep6_R1 = []
Array[File] fastqs_rep6_R2 = []
Array[File] fastqs_rep7_R1 = []
Array[File] fastqs_rep7_R2 = []
Array[File] fastqs_rep8_R1 = []
Array[File] fastqs_rep8_R2 = []
Array[File] fastqs_rep9_R1 = []
Array[File] fastqs_rep9_R2 = []
Array[File] fastqs_rep10_R1 = []
Array[File] fastqs_rep10_R2 = []
Array[File] bams = []
Array[File] nodup_bams = []
Array[File] tas = []
Array[File] peaks = []
Array[File] peaks_pr1 = []
Array[File] peaks_pr2 = []
File? peak_pooled
File? peak_ppr1
File? peak_ppr2
# group: pipeline_parameter
String pipeline_type = 'atac'
Boolean align_only = false
Boolean true_rep_only = false
Boolean enable_xcor = false
Boolean enable_count_signal_track = false
Boolean enable_idr = true
Boolean enable_preseq = false
Boolean enable_fraglen_stat = true
Boolean enable_tss_enrich = true
Boolean enable_annot_enrich = true
Boolean enable_jsd = true
Boolean enable_compare_to_roadmap = false
Boolean enable_gc_bias = true
# group: adapter_trimming
String cutadapt_param = '-e 0.1 -m 5'
Boolean auto_detect_adapter = false
String? adapter
Array[String] adapters_rep1_R1 = []
Array[String] adapters_rep1_R2 = []
Array[String] adapters_rep2_R1 = []
Array[String] adapters_rep2_R2 = []
Array[String] adapters_rep3_R1 = []
Array[String] adapters_rep3_R2 = []
Array[String] adapters_rep4_R1 = []
Array[String] adapters_rep4_R2 = []
Array[String] adapters_rep5_R1 = []
Array[String] adapters_rep5_R2 = []
Array[String] adapters_rep6_R1 = []
Array[String] adapters_rep6_R2 = []
Array[String] adapters_rep7_R1 = []
Array[String] adapters_rep7_R2 = []
Array[String] adapters_rep8_R1 = []
Array[String] adapters_rep8_R2 = []
Array[String] adapters_rep9_R1 = []
Array[String] adapters_rep9_R2 = []
Array[String] adapters_rep10_R1 = []
Array[String] adapters_rep10_R2 = []
# group: alignment
Int multimapping = 4
String dup_marker = 'picard'
Boolean no_dup_removal = false
Int mapq_thresh = 30
Array[String] filter_chrs = ['chrM', 'MT']
Int subsample_reads = 0
Int xcor_subsample_reads = 25000000
Array[Int?] read_len = []
Int pseudoreplication_random_seed = 0
# group: peak_calling
Int cap_num_peak = 300000
Float pval_thresh = 0.01
Int smooth_win = 150
Float idr_thresh = 0.05
# group: resource_parameter
Int align_cpu = 6
Float align_mem_factor = 0.15
Int align_time_hr = 48
Float align_disk_factor = 8.0
Int filter_cpu = 4
Float filter_mem_factor = 0.4
Int filter_time_hr = 24
Float filter_disk_factor = 8.0
Int bam2ta_cpu = 2
Float bam2ta_mem_factor = 0.3
Int bam2ta_time_hr = 12
Float bam2ta_disk_factor = 4.0
Float spr_mem_factor = 20.0
Float spr_disk_factor = 30.0
Int jsd_cpu = 4
Float jsd_mem_factor = 0.1
Int jsd_time_hr = 12
Float jsd_disk_factor = 2.0
Int xcor_cpu = 2
Float xcor_mem_factor = 1.0
Int xcor_time_hr = 6
Float xcor_disk_factor = 4.5
Int call_peak_cpu = 2
Float call_peak_mem_factor = 4.0
Int call_peak_time_hr = 24
Float call_peak_disk_factor = 30.0
Float macs2_signal_track_mem_factor = 12.0
Int macs2_signal_track_time_hr = 24
Float macs2_signal_track_disk_factor = 80.0
Float preseq_mem_factor = 0.5
Float preseq_disk_factor = 5.0
String? filter_picard_java_heap
String? preseq_picard_java_heap
String? fraglen_stat_picard_java_heap
String? gc_bias_picard_java_heap
}
parameter_meta {
docker: {
description: 'Default Docker image URI to run WDL tasks.',
group: 'runtime_environment',
example: 'ubuntu:20.04'
}
singularity: {
description: 'Default Singularity image URI to run WDL tasks. For Singularity users only.',
group: 'runtime_environment',
example: 'docker://ubuntu:20.04'
}
conda: {
description: 'Default Conda environment name to run WDL tasks. For Conda users only.',
group: 'runtime_environment',
example: 'encd-atac'
}
conda_macs2: {
description: 'Conda environment name for task macs2. For Conda users only.',
group: 'runtime_environment',
example: 'encd-atac-macs2'
}
conda_spp: {
description: 'Conda environment name for tasks spp/xcor. For Conda users only.',
group: 'runtime_environment',
example: 'encd-atac-spp'
}
conda_python2: {
description: 'Conda environment name for tasks with python2 wrappers (tss_enrich). For Conda users only.',
group: 'runtime_environment',
example: 'encd-atac-py2'
}
title: {
description: 'Experiment title.',
group: 'pipeline_metadata',
example: 'ENCSR356KRQ (subsampled 1/400)'
}
description: {
description: 'Experiment description.',
group: 'pipeline_metadata',
example: 'ATAC-seq on primary keratinocytes in day 0.0 of differentiation (subsampled 1/400)'
}
genome_tsv: {
description: 'Reference genome database TSV.',
group: 'reference_genome',
help: 'This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.',
example: 'https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv'
}
genome_name: {
description: 'Genome name.',
group: 'reference_genome'
}
ref_fa: {
description: 'Reference FASTA file.',
group: 'reference_genome'
}
ref_mito_fa: {
description: 'Reference FASTA file (mitochondrial reads only).',
group: 'reference_genome'
}
bowtie2_idx_tar: {
description: 'BWA index TAR file.',
group: 'reference_genome'
}
bowtie2_mito_idx_tar: {
description: 'BWA index TAR file (mitochondrial reads only).',
group: 'reference_genome'
}
chrsz: {
description: '2-col chromosome sizes file.',
group: 'reference_genome'
}
blacklist: {
description: 'Blacklist file in BED format.',
group: 'reference_genome',
help: 'Peaks will be filtered with this file.'
}
blacklist2: {
description: 'Secondary blacklist file in BED format.',
group: 'reference_genome',
help: 'If it is defined, it will be merged with atac.blacklist. Peaks will be filtered with merged blacklist.'
}
mito_chr_name: {
description: 'Mitochondrial chromosome name.',
group: 'reference_genome',
help: 'e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.'
}
regex_bfilt_peak_chr_name: {
description: 'Reg-ex for chromosomes to keep while filtering peaks.',
group: 'reference_genome',
help: 'Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.'
}
gensz: {
description: 'Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.',
group: 'reference_genome'
}
tss: {
description: 'TSS file in BED format.',
group: 'reference_genome'
}
dnase: {
description: 'Open chromatin regions file in BED format.',
group: 'reference_genome'
}
prom: {
description: 'Promoter regions file in BED format.',
group: 'reference_genome'
}
enh: {
description: 'Enhancer regions file in BED format.',
group: 'reference_genome'
}
reg2map: {
description: 'Cell type signals file.',
group: 'reference_genome'
}
reg2map_bed: {
description: 'File of regions used to generate reg2map signals.',
group: 'reference_genome'
}
roadmap_meta: {
description: 'Roadmap metadata.',
group: 'reference_genome'
}
paired_end: {
description: 'Sequencing endedness.',
group: 'input_genomic_data',
help: 'Setting this on means that all replicates are paired ended. For mixed samples, use atac.paired_ends array instead.',
example: true
}
paired_ends: {
description: 'Sequencing endedness array (for mixed SE/PE datasets).',
group: 'input_genomic_data',
help: 'Whether each biological replicate is paired ended or not.'
}
fastqs_rep1_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 1.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from FASTQ files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.',
example: [
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz"
]
}
fastqs_rep1_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 1.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.',
example: [
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair2/ENCFF248EJF.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair2/ENCFF368TYI.subsampled.400.fastq.gz"
]
}
fastqs_rep2_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 2.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.',
example: [
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair1/ENCFF641SFZ.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair1/ENCFF751XTV.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair1/ENCFF927LSG.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair1/ENCFF859BDM.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair1/ENCFF193RRC.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair1/ENCFF366DFI.subsampled.400.fastq.gz"
]
}
fastqs_rep2_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 2.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.',
example: [
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair2/ENCFF031ARQ.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair2/ENCFF590SYZ.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair2/ENCFF734PEQ.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair2/ENCFF007USV.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair2/ENCFF886FSC.subsampled.400.fastq.gz",
"https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep2/pair2/ENCFF573UXK.subsampled.400.fastq.gz"
]
}
fastqs_rep3_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 3.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep3_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 3.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep4_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 4.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep4_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 4.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep5_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 5.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep5_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 5.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep6_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 6.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep6_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 6.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep7_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 7.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep7_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 7.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep8_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 8.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep8_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 8.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep9_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 9.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep9_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 9.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep10_R1: {
description: 'Read1 FASTQs to be merged for a biological replicate 10.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read2 FASTQs (atac.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.'
}
fastqs_rep10_R2: {
description: 'Read2 FASTQs to be merged for a biological replicate 10.',
group: 'input_genomic_data',
help: 'Make sure that they are consistent with read1 FASTQs (atac.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.'
}
bams: {
description: 'List of unfiltered/raw BAM files for each biological replicate.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].'
}
nodup_bams: {
description: 'List of filtered/deduped BAM files for each biological replicate',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].'
}
tas: {
description: 'List of TAG-ALIGN files for each biological replicate.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].'
}
peaks: {
description: 'List of NARROWPEAK files (not blacklist filtered) for each biological replicate.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. atac.peaks_pr1, atac.peak_pooled) according to your flag settings (e.g. atac.true_rep_only) and number of replicates. If you have more than one replicate then define atac.peak_pooled, atac.peak_ppr1 and atac.peak_ppr2. If atac.true_rep_only flag is on then do not define any parameters (atac.peaks_pr1, atac.peaks_pr2, atac.peak_ppr1 and atac.peak_ppr2) related to pseudo replicates.'
}
peaks_pr1: {
description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from PEAK files. Define if atac.true_rep_only flag is off.'
}
peaks_pr2: {
description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from PEAK files. Define if atac.true_rep_only flag is off.'
}
peak_pooled: {
description: 'NARROWPEAK file for pooled true replicate.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.'
}
peak_ppr1: {
description: 'NARROWPEAK file for pooled pseudo replicate 1.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and atac.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.'
}
peak_ppr2: {
description: 'NARROWPEAK file for pooled pseudo replicate 2.',
group: 'input_genomic_data',
help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and atac.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.'
}
pipeline_type: {
description: 'Pipeline type. atac for ATAC-Seq or dnase for DNase-Seq.',
group: 'pipeline_parameter',
help: 'The only difference of two types is that TN5 shifting of TAG-ALIGN is done for atac. TAG-ALIGN is in 6-col BED format. It is a simplified version of BAM.',
choices: ['atac', 'dnase'],
example: 'atac'
}
align_only: {
description: 'Align only mode.',
group: 'pipeline_parameter',
help: 'Reads will be aligned but there will be no peak-calling on them.'
}
true_rep_only: {
description: 'Disables all analyses related to pseudo-replicates.',
group: 'pipeline_parameter',
help: 'Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).'
}
enable_xcor: {
description: 'Enables cross-correlation analysis.',
group: 'pipeline_parameter',
help: 'Generates cross-correlation plot.'
}
enable_count_signal_track: {
description: 'Enables generation of count signal tracks.',
group: 'pipeline_parameter'
}
enable_idr: {
description: 'Enables IDR on MACS2 NARROWPEAKs.',
group: 'pipeline_parameter'
}
enable_preseq: {
description: 'Enables preseq analysis.',
group: 'pipeline_parameter'
}
enable_fraglen_stat: {
description: 'Enables calculation of fragment length distribution/statistics.',
group: 'pipeline_parameter'
}
enable_tss_enrich: {
description: 'Enables TSS enrichment plot generation.',
group: 'pipeline_parameter'
}
enable_annot_enrich: {
description: 'Enables annotated regions enrichment analysis.',
group: 'pipeline_parameter'
}
enable_jsd: {
description: 'Enables Jensen-Shannon Distance (JSD) plot generation.',
group: 'pipeline_parameter'
}
enable_compare_to_roadmap: {
description: 'Enables comparison to Roadmap.',
group: 'pipeline_parameter'
}
enable_gc_bias: {
description: 'Enables GC bias calculation.',
group: 'pipeline_parameter'
}
cutadapt_param: {
description: 'Parameters for cutadapt.',
group: 'adapter_trimming',
help: 'It is -e 0.1 -m 5 by default (err_rate=0.1, min_trim_len=5). You can define any parameters that cutadapt supports.'
}
auto_detect_adapter: {
description: 'Auto-detect/trim adapter sequences.',
group: 'adapter_trimming',
help: 'Can detect/trim three types of adapter sequences. Illumina: AGATCGGAAGAGC, Nextera: CTGTCTCTTATA, smallRNA: TGGAATTCTCGG.',
example: true
}
adapter: {
description: 'Adapter for all FASTQs.',
group: 'adapter_trimming',
help: 'Define if all FASTQs have the same adapter sequence. Otherwise define adapter sequence for individual FASTQ in atac.adapters_repX_R1 and atac.adapters_repX_R2 instead. Use atac.auto_detect_adapter if you want to detect adapters automatically. If all of your FASTQs are already trimmed then leave all adapter-related parameters undefined/empty.'
}
adapters_rep1_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 1.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep1_R2). You can combine this with atac.auto_detect_adapter. Pipeline will auto-detect/trim adapter sequences for null entry in this list. e.g. ["AAGGCCTT", null, "AAGGCCTT"].'
}
adapters_rep1_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 1.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep1_R1).'
}
adapters_rep2_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 2.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep2_R2).'
}
adapters_rep2_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 2.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep2_R1).'
}
adapters_rep3_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 3.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep3_R2).'
}
adapters_rep3_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 3.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep3_R1).'
}
adapters_rep4_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 4.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep4_R2).'
}
adapters_rep4_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 4.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep4_R1).'
}
adapters_rep5_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 5.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep5_R2).'
}
adapters_rep5_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 5.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep5_R1).'
}
adapters_rep6_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 6.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep6_R2).'
}
adapters_rep6_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 6.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep6_R1).'
}
adapters_rep7_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 7.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep7_R2).'
}
adapters_rep7_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 7.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep7_R1).'
}
adapters_rep8_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 8.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep8_R2).'
}
adapters_rep8_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 8.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep8_R1).'
}
adapters_rep9_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 9.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep9_R2).'
}
adapters_rep9_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 9.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep9_R1).'
}
adapters_rep10_R1: {
description: 'Adapter sequences for read1 FASTQs to be merged for a biological replicate 10.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read2 FASTQs (atac.adapters_rep10_R2).'
}
adapters_rep10_R2: {
description: 'Adapter sequences for read2 FASTQs to be merged for a biological replicate 10.',
group: 'adapter_trimming',
help: 'Make sure that they are consistent with read1 FASTQs (atac.adapters_rep10_R1).'
}
multimapping: {
description: 'Number of multimappers.',
group: 'alignment',
help: 'It is 4 by default. Set it to 0 if your sample does not have multimappers.'
}
dup_marker: {
description: 'Marker for duplicate reads. picard or sambamba.',
group: 'alignment',
help: 'picard for Picard MarkDuplicates or sambamba for sambamba markdup.'
}
no_dup_removal: {
description: 'Disable removal of duplicate reads during filtering BAM.',
group: 'alignment',
help: 'Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.'
}
mapq_thresh: {
description: 'Threshold for low MAPQ reads removal.',
group: 'alignment',
help: 'Low MAPQ reads are filtered out while filtering BAM.'
}
filter_chrs: {
description: 'List of chromosomes to be filtered out while filtering BAM.',
group: 'alignment',
help: 'It is ["chrM", "MT"] by default. Therefore, mitochondrial reads will be filtered out while filtering. Make it empty if you want to keep all reads.'
}
subsample_reads: {
description: 'Subsample reads. Shuffle and subsample reads.',
group: 'alignment',
help: 'This affects all downstream analyses after filtering BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.'
}
xcor_subsample_reads: {
description: 'Subsample reads for cross-corrlelation analysis only.',
group: 'alignment',
help: 'This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.'
}
read_len: {
description: 'Read length per biological replicate.',
group: 'alignment',
help: 'Pipeline can estimate read length from FASTQs. If you start pipeline from other types (BAM, NODUP_BAM, TA, ...) than FASTQ. Then provide this for some analyses that require read length (e.g. TSS enrichment plot).'
}
pseudoreplication_random_seed: {
description: 'Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).',
group: 'alignment',
help: 'Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.'
}
cap_num_peak: {
description: 'Upper limit on the number of peaks.',
group: 'peak_calling',
help: 'Called peaks will be sorted in descending order of score and the number of peaks will be capped at this number by taking first N peaks.'
}
pval_thresh: {
description: 'p-value Threshold for MACS2 peak caller.',
group: 'peak_calling',
help: 'macs2 callpeak -p'
}
smooth_win: {
description: 'Size of smoothing windows for MACS2 peak caller.',
group: 'peak_calling',
help: 'This will be used for both generating MACS2 peaks/signal tracks.'
}
idr_thresh: {
description: 'IDR threshold.',
group: 'peak_calling'
}
align_cpu: {
description: 'Number of cores for task align.',
group: 'resource_parameter',
help: 'Task align merges/crops/maps FASTQs.'
}
align_mem_factor: {
description: 'Multiplication factor to determine memory required for task align.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
align_time_hr: {
description: 'Walltime (h) required for task align.',
group: 'resource_parameter',
help: 'This is for HPCs only. e.g. SLURM, SGE, ...'
}
align_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task align.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.'
}
filter_cpu: {
description: 'Number of cores for task filter.',
group: 'resource_parameter',
help: 'Task filter filters raw/unfilterd BAM to get filtered/deduped BAM.'
}
filter_mem_factor: {
description: 'Multiplication factor to determine memory required for task filter.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
filter_time_hr: {
description: 'Walltime (h) required for task filter.',
group: 'resource_parameter',
help: 'This is for HPCs only. e.g. SLURM, SGE, ...'
}
filter_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task filter.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.'
}
bam2ta_cpu: {
description: 'Number of cores for task bam2ta.',
group: 'resource_parameter',
help: 'Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.'
}
bam2ta_mem_factor: {
description: 'Multiplication factor to determine memory required for task bam2ta.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
bam2ta_time_hr: {
description: 'Walltime (h) required for task bam2ta.',
group: 'resource_parameter',
help: 'This is for HPCs only. e.g. SLURM, SGE, ...'
}
bam2ta_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task bam2ta.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.'
}
spr_mem_factor: {
description: 'Multiplication factor to determine memory required for task spr.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
spr_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task spr.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.'
}
jsd_cpu: {
description: 'Number of cores for task jsd.',
group: 'resource_parameter',
help: 'Task jsd plots Jensen-Shannon distance and metrics related to it.'
}
jsd_mem_factor: {
description: 'Multiplication factor to determine memory required for task jsd.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
jsd_time_hr: {
description: 'Walltime (h) required for task jsd.',
group: 'resource_parameter',
help: 'This is for HPCs only. e.g. SLURM, SGE, ...'
}
jsd_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task jsd.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.'
}
xcor_cpu: {
description: 'Number of cores for task xcor.',
group: 'resource_parameter',
help: 'Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.'
}
xcor_mem_factor: {
description: 'Multiplication factor to determine memory required for task xcor.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
xcor_time_hr: {
description: 'Walltime (h) required for task xcor.',
group: 'resource_parameter',
help: 'This is for HPCs only. e.g. SLURM, SGE, ...'
}
xcor_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task xcor.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.'
}
call_peak_cpu: {
description: 'Number of cores for task call_peak. MACS2 is single-thread. No more than 2 is required.',
group: 'resource_parameter',
help: 'Task call_peak call peaks on TAG-ALIGNs by using MACS2 peak caller.'
}
call_peak_mem_factor: {
description: 'Multiplication factor to determine memory required for task call_peak.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
call_peak_time_hr: {
description: 'Walltime (h) required for task call_peak.',
group: 'resource_parameter',
help: 'This is for HPCs only. e.g. SLURM, SGE, ...'
}
call_peak_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task call_peak.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.'
}
macs2_signal_track_mem_factor: {
description: 'Multiplication factor to determine memory required for task macs2_signal_track.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
macs2_signal_track_time_hr: {
description: 'Walltime (h) required for task macs2_signal_track.',
group: 'resource_parameter',
help: 'This is for HPCs only. e.g. SLURM, SGE, ...'
}
macs2_signal_track_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task macs2_signal_track.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.'
}
preseq_mem_factor: {
description: 'Multiplication factor to determine memory required for task preseq.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).'
}
preseq_disk_factor: {
description: 'Multiplication factor to determine persistent disk size for task preseq.',
group: 'resource_parameter',
help: 'This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.'
}
filter_picard_java_heap: {
description: 'Maximum Java heap (java -Xmx) in task filter.',
group: 'resource_parameter',
help: 'Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.'
}
preseq_picard_java_heap: {
description: 'Maximum Java heap (java -Xmx) in task preseq.',
group: 'resource_parameter',
help: 'Maximum memory for Picard tools EstimateLibraryComplexity. If not defined, 90% of preseq task\'s memory will be used.'
}
fraglen_stat_picard_java_heap: {
description: 'Maximum Java heap (java -Xmx) in task fraglen_stat_pe (for paired end replicate only).',
group: 'resource_parameter',
help: 'Maximum memory for Picard tools CollectInsertSizeMetrics. If not defined, 90% of fraglen_stat tasks\'s memory will be used.'
}
gc_bias_picard_java_heap: {
description: 'Maximum Java heap (java -Xmx) in task gc_bias.',
group: 'resource_parameter',
help: 'Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias tasks\'s memory will be used.'
}
}
RuntimeEnvironment runtime_environment = {
'docker': docker, 'singularity': singularity, 'conda': conda
}
RuntimeEnvironment runtime_environment_spp = {
'docker': docker, 'singularity': singularity, 'conda': conda_spp
}
RuntimeEnvironment runtime_environment_macs2 = {
'docker': docker, 'singularity': singularity, 'conda': conda_macs2
}
RuntimeEnvironment runtime_environment_python2 = {
'docker': docker, 'singularity': singularity, 'conda': conda_python2
}
String aligner = 'bowtie2'
String peak_caller = 'macs2'
String peak_type = 'narrowPeak'
# read genome data and paths
if ( defined(genome_tsv) ) {
call read_genome_tsv { input:
genome_tsv = genome_tsv,
runtime_environment = runtime_environment
}
}
File ref_fa_ = select_first([ref_fa, read_genome_tsv.ref_fa])
File bowtie2_idx_tar_ = select_first([bowtie2_idx_tar, read_genome_tsv.bowtie2_idx_tar])
File bowtie2_mito_idx_tar_ = select_first([bowtie2_mito_idx_tar, read_genome_tsv.bowtie2_mito_idx_tar])
File chrsz_ = select_first([chrsz, read_genome_tsv.chrsz])
String gensz_ = select_first([gensz, read_genome_tsv.gensz])
File? blacklist1_ = if defined(blacklist) then blacklist
else read_genome_tsv.blacklist
File? blacklist2_ = if defined(blacklist2) then blacklist2
else read_genome_tsv.blacklist2
# merge multiple blacklists
# two blacklists can have different number of columns (3 vs 6)
# so we limit merged blacklist's columns to 3
Array[File] blacklists = select_all([blacklist1_, blacklist2_])