-
Notifications
You must be signed in to change notification settings - Fork 0
/
04_prediction.nf
80 lines (58 loc) · 2.46 KB
/
04_prediction.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env nextflow
CLUSTL_GROUP = Channel.fromPath("${OUTPUT}/features_group/clustl.tsv.gz")
CLUSTL_GROUP_V = CLUSTL_GROUP.first()
HOTMAPS_GROUP = Channel.fromPath("${OUTPUT}/features_group/hotmaps.tsv.gz")
HOTMAPS_GROUP_V = HOTMAPS_GROUP.first()
SMREGIONS_GROUP = Channel.fromPath("${OUTPUT}/features_group/smregions.tsv.gz")
SMREGIONS_GROUP_V = SMREGIONS_GROUP.first()
SPLITCV_OUT = Channel.fromPath("${OUTPUT}/splitcv_meta/*/*.cvdata.pickle.gz")
GENE_TTYPE_OUT = SPLITCV_OUT.map{ it -> [it.baseName.split('\\.')[0], it.getParent().baseName]}
process AnnotateSaturation {
tag "Annotate saturation ${gene} ${ttype}"
label "boostdm"
publishDir "${OUTPUT}/saturation/annotation", mode: 'copy'
input:
tuple val(gene), val(ttype) from GENE_TTYPE_OUT
path groupCLUSTL from CLUSTL_GROUP_V
path groupHotMAPS from HOTMAPS_GROUP_V
path groupSMRegions from SMREGIONS_GROUP_V
output:
tuple val(gene), val(ttype), path(output) into ANNOTATION_SATURATION
script:
vep = "${VEP_SATURATION}/${gene}.vep.gz"
output = "${gene}.${ttype}.annotated.tsv.gz"
"""
runner.sh annotations/gene.py \
--gene ${gene} \
--ttype ${ttype} \
--mutations ${vep} \
--clustl-group ${groupCLUSTL} \
--hotmaps-group ${groupHotMAPS} \
--smregions-group ${groupSMRegions} \
"""
}
MODEL = Channel.fromPath("${OUTPUT}/model_selection/eval_data.pickle.gz").first()
// for testing purposes only
// SATURATION_OUT = Channel.fromPath("${OUTPUT}/saturation/annotation/*.annotated.tsv.gz")
// ANNOTATION_SATURATION = SATURATION_OUT.map{ it -> [it.baseName.split('\\.')[0], it.baseName.split('\\.')[1], it]}
process PredictSaturation {
tag "Predict saturation ${gene} ${ttype}"
label "boostdm"
publishDir "${OUTPUT}/saturation/prediction", mode: 'copy'
input:
tuple val(gene), val(ttype), path(input) from ANNOTATION_SATURATION
path model from MODEL
output:
path("*.tsv.gz") into PREDICTION_SATURATION
script:
"""
runner.sh perform_predictions.py \
--muts ${input} \
--gene ${gene} \
--tumor-type ${ttype} \
--models-folder ${OUTPUT}/training_meta \
--evaluations-folder ${OUTPUT}/evaluation \
--model-selection ${model} \
--high-quality-only
"""
}