Skip to content

Commit

Permalink
Merge branch 'CW-2493' into 'dev'
Browse files Browse the repository at this point in the history
make output more practical [CW-2493]

See merge request epi2melabs/workflows/wf-amplicon!15
  • Loading branch information
julibeg committed Aug 3, 2023
2 parents 4cc236c + ad19b20 commit 94d0133
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 5 deletions.
5 changes: 3 additions & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ include:
variables:
NF_WORKFLOW_OPTS:
"--fastq test_data/fastq --reference test_data/reference.fasta \
--threads 2"
--threads 2 --combine_results"
CI_FLAVOUR: "new"

docker-run:
Expand All @@ -30,7 +30,7 @@ docker-run:
variables:
NF_WORKFLOW_OPTS:
"--fastq test_data/fastq --reference test_data/reference.fasta \
--threads 2 --reads_downsampling_size 2000"
--threads 2 --reads_downsampling_size 2000 --combine_results"
NF_PROCESS_FILES: >
main.nf
modules/local/pipeline.nf
Expand All @@ -44,6 +44,7 @@ docker-run:
--fastq test_data/fastq
--reference test_data/reference.fasta
--min_read_qual 20
--combine_results
NF_PROCESS_FILES: >
main.nf
modules/local/pipeline.nf
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.3.0]
### Changed
- VCF files now use the sample alias as sample name instead of `SAMPLE`.
- The reference FASTA file with sanitized sequence headers (with `:`, `*`, and whitespace replaced with `_`) which is used by the workflow internally due to some tools not tolerating these symbols in the sequence IDs is now also published alongside the other results.

### Added
- Parameter `--combine_results` to also output merged BAM and VCF files.

## [v0.2.3]
### Fixed
Expand Down
8 changes: 8 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,21 @@ workflow pipeline {
| mix(
software_versions | map { [it, null] },
workflow_params | map { [it, null] },
variantCallingPipeline.out.sanitized_ref | map { [it, null] },
variantCallingPipeline.out.variants
| map { meta, vcf -> [vcf, "$meta.alias/variants"] },
variantCallingPipeline.out.mapped
| map { meta, bam -> [bam, "$meta.alias/alignments"] },
variantCallingPipeline.out.consensus
| map { meta, cons -> [cons, "$meta.alias/consensus"] },
)
if (params.combine_results) {
ch_to_publish = ch_to_publish
| mix(
variantCallingPipeline.out.combined_vcfs | map { [it, null] },
variantCallingPipeline.out.combined_bams | map { [it, null] },
)
}

makeReport(
ch_results_for_report | collect,
Expand Down
51 changes: 49 additions & 2 deletions modules/local/pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ process alignReads {
script:
"""
minimap2 -t $task.cpus -ax map-ont reference.fasta reads.fastq.gz \
-R '@RG\\tID:$meta.alias\\tSM:$meta.alias' \
| samtools sort -@ $task.cpus -o aligned.sorted.bam -
samtools index aligned.sorted.bam
Expand Down Expand Up @@ -129,8 +130,9 @@ process medakaVariant {
medaka tools annotate --dpsp medaka.vcf reference.fasta input.bam \
medaka.annotated.unfiltered.vcf
# filter variants
bcftools filter medaka.annotated.unfiltered.vcf \
# use the sample alias as sample name in the VCF and filter variants
bcftools reheader medaka.annotated.unfiltered.vcf -s <(echo '$meta.alias') \
| bcftools filter \
-e 'INFO/DP < $min_coverage' \
-s LOW_DEPTH \
-Oz -o medaka.annotated.vcf.gz
Expand All @@ -143,6 +145,35 @@ process medakaVariant {
"""
}

process mergeVCFs {
label "medaka"
cpus 1
input: path "VCFs/file*.vcf.gz"
output: path "combined.vcf.gz"
script:
"""
(
cd VCFs
ls | xargs -n1 bcftools index
)
bcftools merge VCFs/file*.vcf.gz -Oz -o combined.vcf.gz
"""
}

process mergeBAMs {
label "wfamplicon"
cpus 1
input:
path "BAMs/file*.bam"
path "indices/file*.bam.bai"
output: path "combined.bam"
script:
"""
samtools merge BAMs/* indices/* -pXo combined.bam
"""
}


// workflow module
workflow pipeline {
take:
Expand Down Expand Up @@ -196,10 +227,26 @@ workflow pipeline {
mosdepth.out
| groupTuple
| concatMosdepthResultFiles

// combine per-sample BAM and VCF files if requested
combined_vcfs = null
combined_bams = null
if (params.combine_results) {
combined_vcfs = mergeVCFs(
medakaVariant.out.filtered.collect { meta, vcf -> vcf }
)
combined_bams = mergeBAMs(
alignReads.out.collect { meta, bam, bai -> bam },
alignReads.out.collect { meta, bam, bai -> bai }
)
}
emit:
sanitized_ref = ref
mapped = alignReads.out | map { meta, bam, bai -> [meta, bam] }
mapping_stats = bamstats.out
depth = concatMosdepthResultFiles.out
variants = medakaVariant.out.filtered
consensus = medakaVariant.out.consensus
combined_vcfs
combined_bams
}
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ params {
sample = null
sample_sheet = null
analyse_unclassified = false
combine_results = false

// filtering + downsampling args
min_read_length = 300
Expand Down Expand Up @@ -65,7 +66,7 @@ manifest {
description = 'Amplicon workflow'
mainScript = 'main.nf'
nextflowVersion = '>=23.04.2'
version = 'v0.2.3'
version = 'v0.3.0'
}

epi2melabs {
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@
"default": "output",
"format": "directory-path",
"description": "Directory for output of all workflow results."
},
"combine_results": {
"type": "boolean",
"default": false,
"description": "Whether to merge per-sample results into a single BAM / VCF file.",
"help_text": "Per default, results are grouped per sample. With this option, an additional BAM and VCF file are produced which contain the alignments / variants for all samples and amplicons."
}
}
},
Expand Down

0 comments on commit 94d0133

Please sign in to comment.