forked from NCBI-Hackathons/SPeW
-
Notifications
You must be signed in to change notification settings - Fork 1
/
SPEW.nf
124 lines (105 loc) · 3.29 KB
/
SPEW.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/* This is just a toy example
/*
* Copyright (c) 2013-2017, Centre for Genomic Regulation (CRG) and the authors.
*
* This file is part of 'RNA-Toy'.
*
* RNA-Toy is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* RNA-Toy is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with RNA-Toy. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Proof of concept Nextflow based RNA-Seq pipeline
*
* Authors:
* Paolo Di Tommaso <[email protected]>
* Emilio Palumbo <[email protected]>
*/
/*
* Defines some parameters in order to specify the refence genomes
* and read pairs by using the command line options
*/
params.reads = "$baseDir/data/ggal/*_{1,2}.fq"
params.annot = "$baseDir/data/ggal/ggal_1_48850000_49020000.bed.gff"
params.genome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
params.outdir = 'results'
log.info """\
R N A T O Y P I P E L I N E
=============================
genome: ${params.genome}
annot : ${params.annot}
reads : ${params.reads}
outdir: ${params.outdir}
"""
.stripIndent()
/*
* the reference genome file
*/
genome_file = file(params.genome)
annotation_file = file(params.annot)
/*
* Create the `read_pairs` channel that emits tuples containing three elements:
* the pair ID, the first read-pair file and the second read-pair file
*/
Channel
.fromFilePairs( params.reads )
.ifEmpty { error "Cannot find any reads matching: ${params.reads}" }
.set { read_pairs }
/*
* Step 1. Builds the genome index required by the mapping process
*/
process buildIndex {
tag "$genome_file.baseName"
input:
file genome from genome_file
output:
file 'genome.index*' into genome_index
"""
bowtie2-build --threads ${task.cpus} ${genome} genome.index
"""
}
/*
* Step 2. Maps each read-pair by using Tophat2 mapper tool
*/
process mapping {
tag "$pair_id"
input:
file genome from genome_file
file annot from annotation_file
file index from genome_index
set pair_id, file(reads) from read_pairs
output:
set pair_id, "accepted_hits.bam" into bam
"""
tophat2 -p ${task.cpus} --GTF $annot genome.index $reads
mv tophat_out/accepted_hits.bam .
"""
}
/*
* Step 3. Assembles the transcript by using the "cufflinks" tool
*/
process makeTranscript {
tag "$pair_id"
publishDir params.outdir, mode: 'copy'
input:
file annot from annotation_file
set pair_id, file(bam_file) from bam
output:
set pair_id, file('transcript_*.gtf') into transcripts
"""
cufflinks --no-update-check -q -p $task.cpus -G $annot $bam_file
mv transcripts.gtf transcript_${pair_id}.gtf
"""
}
workflow.onComplete {
println ( workflow.success ? "Done!" : "Oops .. something went wrong" )
}