forked from BU-ISCIII/nextflow-scif
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
172 lines (137 loc) · 4.41 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/*
========================================================================================
N E X T F L O W B A S I C W O R K F L O W
========================================================================================
#### Homepage / Documentation
https://github.com/BU-ISCIII/nextflow-scif
@#### Authors
S. Monzon <[email protected]>
## Based on nf-core pipelines
# https://github.com/nf-core
----------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------
Pipeline overview:
- 1: BWA indexing of reference genome.
- 2: BWA mapping against indexed genome.
- 3: Samtools sorting and indexing.
- 4: Variant calling using bcftools
----------------------------------------------------------------------------------------
*/
def helpMessage() {
log.info"""
=========================================
BU-ISCIII/nextflow-scif DEMO PIPELINE v${version}
=========================================
Usage:
The typical command for running the pipeline is as follows:
nextflow run BU-ISCIII/nextflow-scif -profile standard
Pipeline arguments:
--reads Path to input data (must be surrounded with quotes).
--genome Path to reference genome.
--outdir Output dir.
--help show this message.
-profile Hardware config to use. standard/docker/singularity. Default: standard.
""".stripIndent()
}
// Pipeline version
version = '0.1'
// Show help message
params.help = false
if (params.help){
helpMessage()
exit 0
}
// Default parameters
params.reads = "$baseDir/data/samples/*.fastq"
params.genome = "$baseDir/data/genome.fa"
params.outdir = 'results'
/*
* the reference genome file
*/
genome_file = file(params.genome)
/*
* Create the `reads` channel. Size 1 for single-end. Size 2 for paired-end.
*/
Channel
.fromFilePairs( params.reads, size : 1 )
.ifEmpty { error "Cannot find any reads matching: ${params.reads}" }
.set { reads }
// Header log info
log.info "========================================================="
log.info " BU-ISCIII/nextflow-scif : basic nf workflow"
log.info "========================================================="
def summary = [:]
summary['Reads'] = params.reads
summary['Reference genome'] = params.genome
summary['Results dir'] = params.outdir
log.info summary.collect { k,v -> "${k.padRight(21)}: $v" }.join("\n")
log.info "===================================="
// Posible software version and profile checks (p.e check if standar profile is used in hpc server)
// if().....
/*
* Step 1. Builds the genome index required by the mapping process
*/
process makeBWAindex {
tag "${fasta}"
input:
file fasta from genome_file
output:
file "${fasta}*" into bwa_index
script:
"""
mkdir BWAIndex
bwa index -a bwtsw $fasta
"""
}
/*
* Step 2. Maps each read-pair by using Tophat2 mapper tool
*/
process mapping {
tag "$name"
input:
set val(name),file(reads) from reads
file index from bwa_index
file fasta from genome_file
output:
file '*.bam' into bwa_bam
script:
prefix = reads[0].toString() - ~/(\.fastq)$/
"""
bwa mem -M $fasta $reads | samtools view -bS - > ${prefix}.bam
"""
}
process samtools {
tag "${bam.baseName}"
publishDir path: "${params.outdir}/bwa", mode: 'copy'
input:
file bam from bwa_bam
output:
file '*.sorted.bam' into bam_for_bcftools
file '*.sorted.bam.bai' into bai_for_bcftools
file '*.stats.txt' into samtools_stats
script:
"""
samtools sort $bam -o ${bam.baseName}.sorted.bam -T ${bam.baseName}.sorted
samtools index ${bam.baseName}.sorted.bam
samtools stats ${bam.baseName}.sorted.bam > ${bam.baseName}.stats.txt
"""
}
process variantCalling {
tag "${prefix}"
publishDir path : "${params.outdir}/vcf", mode:'copy'
input:
file bam_sorted from bam_for_bcftools
file bai_sorted from bai_for_bcftools
file genome from genome_file
output:
file "*.vcf" into vcf_file
script:
prefix = bam_sorted[0].toString() - ~/(\.sorted)?(\.bam)?$/
"""
samtools mpileup -g -f ${genome} ${bam_sorted} | bcftools call -mv - > ${prefix}.vcf
"""
}
workflow.onComplete {
println ( workflow.success ? "Done!" : "Oops .. something went wrong" )
// E-mail and html reporting configuration.
}