diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cea3877..1107406f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,26 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 3.0.1 [2024-06-10] + +### `Added` + +### `Changed` + +- [#625](https://github.com/nf-core/mag/pull/625) - Updated link to geNomad database for downloading (reported by @amizeranschi, fix by @jfy133) + +### `Fixed` + +- [#618](https://github.com/nf-core/mag/pull/618) - Fix CENTRIFUGE mkfifo failures by using work directory /tmp (reported by @skrakau, fix by @jfy133) + +### `Dependencies` + +| Tool | Previous version | New version | +| ---------- | ---------------- | ----------- | +| Centrifuge | 1.0.4_beta | 1.0.4.1 | + +### `Deprecated` + ## 3.0.0 - [2024-05-13] ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index cf530996..14f8ba03 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/mag + This report has been generated by the nf-core/mag analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-mag-methods-description": order: -1000 diff --git a/conf/modules.config b/conf/modules.config index e45cc0c3..a3be8574 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -222,7 +222,16 @@ process { ] } - withName: CENTRIFUGE { + withName: CENTRIFUGE_CENTRIFUGE { + publishDir = [ + path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + } + + withName: CENTRIFUGE_KREPORT { + ext.prefix = { "${meta.id}_kreport" } publishDir = [ path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, mode: params.publish_dir_mode, @@ -239,7 +248,16 @@ process { ] } - withName: KRONA { + withName: KREPORT2KRONA_CENTRIFUGE { + publishDir = [ + path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.txt", + enabled: false + ] + } + + withName: KRONA_KTIMPORTTAXONOMY { publishDir = [ path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/docs/output.md b/docs/output.md index 88aba227..838fc7c5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -170,9 +170,10 @@ More information on the [Centrifuge](https://ccb.jhu.edu/software/centrifuge/) w Output files - `Taxonomy/centrifuge/[sample]/` - - `report.txt`: Tab-delimited result file. See the [centrifuge manual](https://ccb.jhu.edu/software/centrifuge/manual.shtml#centrifuge-classification-output) for information about the fields - - `kreport.txt`: Classification in the Kraken report format. See the [kraken2 manual](https://github.com/DerrickWood/kraken2/wiki/Manual#output-formats) for more details - - `taxonomy.krona.html`: Interactive pie chart produced by [KronaTools](https://github.com/marbl/Krona/wiki) + - `[sample].kreport.txt`: Classification in the Kraken report format. See the [kraken2 manual](https://github.com/DerrickWood/kraken2/wiki/Manual#output-formats) for more details + - `[sample].report.txt`: Tab-delimited result file. See the [centrifuge manual](https://ccb.jhu.edu/software/centrifuge/manual.shtml#centrifuge-classification-output) for information about the fields + - `[sample].results.txt`: Per read taxonomic classification information. See the [centrifuge manual](https://ccb.jhu.edu/software/centrifuge/manual.shtml#centrifuge-classification-output) for more details + - `[sample].html`: Interactive pie chart produced by [KronaTools](https://github.com/marbl/Krona/wiki) diff --git a/modules.json b/modules.json index 5248f5f1..cabd3125 100644 --- a/modules.json +++ b/modules.json @@ -41,6 +41,17 @@ "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] }, + "centrifuge/centrifuge": { + "branch": "master", + "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", + "installed_by": ["modules"] + }, + "centrifuge/kreport": { + "branch": "master", + "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", + "installed_by": ["modules"], + "patch": "modules/nf-core/centrifuge/kreport/centrifuge-kreport.diff" + }, "checkm/lineagewf": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", @@ -141,6 +152,21 @@ "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", "installed_by": ["modules"] }, + "krakentools/kreport2krona": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "krona/kronadb": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "krona/ktimporttaxonomy": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "maxbin2": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", @@ -205,6 +231,11 @@ "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/centrifuge/centrifuge/environment.yml b/modules/nf-core/centrifuge/centrifuge/environment.yml new file mode 100644 index 00000000..cf34dc0e --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/environment.yml @@ -0,0 +1,7 @@ +name: centrifuge_centrifuge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::centrifuge=1.0.4.1 diff --git a/modules/nf-core/centrifuge/centrifuge/main.nf b/modules/nf-core/centrifuge/centrifuge/main.nf new file mode 100644 index 00000000..d9a5653d --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/main.nf @@ -0,0 +1,91 @@ +process CENTRIFUGE_CENTRIFUGE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4.1--hdcf5f25_1' : + 'biocontainers/centrifuge:1.0.4.1--hdcf5f25_1' }" + + input: + tuple val(meta), path(reads) + path db + val save_unaligned + val save_aligned + + output: + tuple val(meta), path('*report.txt') , emit: report + tuple val(meta), path('*results.txt') , emit: results + tuple val(meta), path('*.{sam,tab}') , optional: true, emit: sam + tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped + tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + def unaligned = '' + def aligned = '' + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : '' + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' + } + """ + ## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included + db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/\\.1.cf\$//'` + + ## make a directory for placing the pipe files in somewhere other than default /tmp + ## otherwise get pipefile name clashes when multiple centrifuge runs on same node + ## use /tmp at the same time + mkdir ./temp + + centrifuge \\ + -x \$db_name \\ + --temp-directory ./temp \\ + -p $task.cpus \\ + $paired \\ + --report-file ${prefix}.report.txt \\ + -S ${prefix}.results.txt \\ + $unaligned \\ + $aligned \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + def unaligned = '' + def aligned = '' + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : '' + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' + } + """ + touch ${prefix}.report.txt + touch ${prefix}.results.txt + touch ${prefix}.sam + echo | gzip -n > ${prefix}.unmapped.fastq.gz + echo | gzip -n > ${prefix}.mapped.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/centrifuge/centrifuge/meta.yml b/modules/nf-core/centrifuge/centrifuge/meta.yml new file mode 100644 index 00000000..a06104e1 --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/meta.yml @@ -0,0 +1,75 @@ +name: centrifuge_centrifuge +description: Classifies metagenomic sequence data +keywords: + - classify + - metagenomics + - fastq + - db +tools: + - centrifuge: + description: Centrifuge is a classifier for metagenomic sequences. + homepage: https://ccb.jhu.edu/software/centrifuge/ + documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml + doi: 10.1101/gr.210641.116 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - db: + type: directory + description: Path to directory containing centrifuge database files + - save_unaligned: + type: boolean + description: If true unmapped fastq files are saved + - save_aligned: + type: boolean + description: If true mapped fastq files are saved +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: | + File containing a classification summary + pattern: "*.{report.txt}" + - results: + type: file + description: | + File containing classification results + pattern: "*.{results.txt}" + - sam: + type: file + description: | + Optional output file containing read alignments (SAM format )or a table of per-read hit information (TAB)s + pattern: "*.{sam,tab}" + - fastq_unmapped: + type: file + description: Unmapped fastq files + pattern: "*.unmapped.fastq.gz" + - fastq_mapped: + type: file + description: Mapped fastq files + pattern: "*.mapped.fastq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sofstam" + - "@jfy133" + - "@sateeshperi" +maintainers: + - "@sofstam" + - "@jfy133" + - "@sateeshperi" diff --git a/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test b/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test new file mode 100644 index 00000000..d83b522a --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_process { + + name "Test Process CENTRIFUGE_CENTRIFUGE" + script "../main.nf" + process "CENTRIFUGE_CENTRIFUGE" + + tag "modules" + tag "modules_nfcore" + tag "centrifuge" + tag "centrifuge/centrifuge" + tag "untar" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ] + """ + } + } + } + + test("sarscov2_fastq_se") { + + when { + process { + """ + input[0] = [ [id: 'test', single_end: true], file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.results[0][1]).name, + file(process.out.fastq_mapped[0][1][0]).name, + file(process.out.fastq_unmapped[0][1][0]).name, + ).match() } + ) + } + + } + + test("sarscov2_fastq_pe") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.results[0][1]).name, + file(process.out.fastq_mapped[0][1][0]).name, + file(process.out.fastq_unmapped[0][1][0]).name, + ).match() } + ) + } + + } + + test("sarscov2_fastq_se_stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id: 'test'], file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test.snap b/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test.snap new file mode 100644 index 00000000..f8a2ef7b --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test.snap @@ -0,0 +1,125 @@ +{ + "sarscov2_fastq_se_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.results.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.mapped.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.unmapped.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + "versions.yml:md5,1ce028d9f968eca6df31586fe3b77c84" + ], + "fastq_mapped": [ + [ + { + "id": "test" + }, + "test.mapped.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "fastq_unmapped": [ + [ + { + "id": "test" + }, + "test.unmapped.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "report": [ + [ + { + "id": "test" + }, + "test.report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "results": [ + [ + { + "id": "test" + }, + "test.results.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test" + }, + "test.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1ce028d9f968eca6df31586fe3b77c84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T07:47:36.886757827" + }, + "sarscov2_fastq_se": { + "content": [ + "test.report.txt", + "test.results.txt", + "", + "" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T08:22:31.470316024" + }, + "sarscov2_fastq_pe": { + "content": [ + "test.report.txt", + "test.results.txt", + "test.mapped.fastq.1.gz", + "test.unmapped.fastq.1.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T08:22:48.866073154" + } +} \ No newline at end of file diff --git a/modules/nf-core/centrifuge/centrifuge/tests/tags.yml b/modules/nf-core/centrifuge/centrifuge/tests/tags.yml new file mode 100644 index 00000000..53444cd2 --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/tests/tags.yml @@ -0,0 +1,2 @@ +centrifuge/centrifuge: + - "modules/nf-core/centrifuge/centrifuge/**" diff --git a/modules/nf-core/centrifuge/kreport/environment.yml b/modules/nf-core/centrifuge/kreport/environment.yml new file mode 100644 index 00000000..5c8fb451 --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/environment.yml @@ -0,0 +1,7 @@ +name: centrifuge_kreport +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::centrifuge=1.0.4.1 diff --git a/modules/nf-core/centrifuge/kreport/main.nf b/modules/nf-core/centrifuge/kreport/main.nf new file mode 100644 index 00000000..25eb7167 --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/main.nf @@ -0,0 +1,45 @@ +process CENTRIFUGE_KREPORT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4.1--hdcf5f25_1' : + 'biocontainers/centrifuge:1.0.4.1--hdcf5f25_1' }" + + input: + tuple val(meta), path(report) + path db + + output: + tuple val(meta), path('*.txt'), emit: kreport + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/\\.1.cf\$//'` + centrifuge-kreport -x \$db_name ${report} > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/centrifuge/kreport/meta.yml b/modules/nf-core/centrifuge/kreport/meta.yml new file mode 100644 index 00000000..5641152b --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/meta.yml @@ -0,0 +1,51 @@ +name: "centrifuge_kreport" +description: Creates Kraken-style reports from centrifuge out files +keywords: + - classify + - metagenomics + - fastq + - db + - report + - kraken +tools: + - centrifuge: + description: Centrifuge is a classifier for metagenomic sequences. + homepage: https://ccb.jhu.edu/software/centrifuge/ + documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml + doi: 10.1101/gr.210641.116 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: File containing the centrifuge classification report + pattern: "*.{txt}" + - db: + type: directory + description: Path to directory containing centrifuge database files +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - kreport: + type: file + description: | + File containing kraken-style report from centrifuge + out files. + pattern: "*.{txt}" +authors: + - "@sofstam" + - "@jfy133" +maintainers: + - "@sofstam" + - "@jfy133" diff --git a/modules/nf-core/centrifuge/kreport/tests/main.nf.test b/modules/nf-core/centrifuge/kreport/tests/main.nf.test new file mode 100644 index 00000000..6347bd7c --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/tests/main.nf.test @@ -0,0 +1,81 @@ +// nf-core modules test centrifuge/kreport +nextflow_process { + + name "Test Process CENTRIFUGE_KREPORT" + script "../main.nf" + process "CENTRIFUGE_KREPORT" + + tag "modules" + tag "modules_nfcore" + tag "centrifuge" + tag "centrifuge/centrifuge" + tag "centrifuge/kreport" + tag "untar" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ] + """ + } + } + run("CENTRIFUGE_CENTRIFUGE") { + script "../../../centrifuge/centrifuge/main.nf" + process { + """ + input[0] = [ [id: 'test', single_end: true], file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + """ + } + } + } + + test("sarscov2_fastq_se") { + + when { + process { + """ + input[0] = CENTRIFUGE_CENTRIFUGE.out.results + input[1] = UNTAR.out.untar.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.kreport[0][1]).name, + ).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = CENTRIFUGE_CENTRIFUGE.out.results + input[1] = UNTAR.out.untar.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/centrifuge/kreport/tests/main.nf.test.snap b/modules/nf-core/centrifuge/kreport/tests/main.nf.test.snap new file mode 100644 index 00000000..4e0aaa79 --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,43c766a19f2edf7e05d1a2a0b1816b13" + ], + "kreport": [ + [ + { + "id": "test", + "single_end": true + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,43c766a19f2edf7e05d1a2a0b1816b13" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T06:18:36.794405448" + }, + "sarscov2_fastq_se": { + "content": [ + "test.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T06:28:20.461891873" + } +} \ No newline at end of file diff --git a/modules/nf-core/centrifuge/kreport/tests/tags.yml b/modules/nf-core/centrifuge/kreport/tests/tags.yml new file mode 100644 index 00000000..a3823d76 --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/tests/tags.yml @@ -0,0 +1,2 @@ +centrifuge/kreport: + - "modules/nf-core/centrifuge/kreport/**" diff --git a/modules/nf-core/krakentools/kreport2krona/environment.yml b/modules/nf-core/krakentools/kreport2krona/environment.yml new file mode 100644 index 00000000..ea49a77c --- /dev/null +++ b/modules/nf-core/krakentools/kreport2krona/environment.yml @@ -0,0 +1,7 @@ +name: krakentools_kreport2krona +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::krakentools=1.2 diff --git a/modules/nf-core/krakentools/kreport2krona/main.nf b/modules/nf-core/krakentools/kreport2krona/main.nf new file mode 100644 index 00000000..f9f27001 --- /dev/null +++ b/modules/nf-core/krakentools/kreport2krona/main.nf @@ -0,0 +1,36 @@ +process KRAKENTOOLS_KREPORT2KRONA { + tag "$meta.id" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0': + 'biocontainers/krakentools:1.2--pyh5e36f6f_0' }" + + input: + tuple val(meta), path(kreport) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + kreport2krona.py \\ + -r ${kreport} \\ + -o ${prefix}.txt \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kreport2krona.py: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/krakentools/kreport2krona/meta.yml b/modules/nf-core/krakentools/kreport2krona/meta.yml new file mode 100644 index 00000000..7a5dda4a --- /dev/null +++ b/modules/nf-core/krakentools/kreport2krona/meta.yml @@ -0,0 +1,40 @@ +name: krakentools_kreport2krona +description: Takes a Kraken report file and prints out a krona-compatible TEXT file +keywords: + - kraken + - krona + - metagenomics + - visualization +tools: + - krakentools: + description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs. + homepage: https://github.com/jenniferlu717/KrakenTools + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - kreport: + type: file + description: Kraken report + pattern: "*.{txt,kreport}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - krona: + type: file + description: Krona text-based input file converted from Kraken report + pattern: "*.{txt,krona}" +authors: + - "@MillironX" +maintainers: + - "@MillironX" diff --git a/modules/nf-core/krona/kronadb/environment.yml b/modules/nf-core/krona/kronadb/environment.yml new file mode 100644 index 00000000..1646628f --- /dev/null +++ b/modules/nf-core/krona/kronadb/environment.yml @@ -0,0 +1,7 @@ +name: krona_kronadb +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::krona=2.7.1 diff --git a/modules/nf-core/krona/kronadb/main.nf b/modules/nf-core/krona/kronadb/main.nf new file mode 100644 index 00000000..1d9bf698 --- /dev/null +++ b/modules/nf-core/krona/kronadb/main.nf @@ -0,0 +1,30 @@ +def VERSION='2.7.1' // Version information not provided by tool on CLI + +process KRONA_KRONADB { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.7.1--pl526_5' : + 'biocontainers/krona:2.7.1--pl526_5' }" + + output: + path 'taxonomy/taxonomy.tab', emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + ktUpdateTaxonomy.sh \\ + $args \\ + taxonomy/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/krona/kronadb/meta.yml b/modules/nf-core/krona/kronadb/meta.yml new file mode 100644 index 00000000..0d42bb10 --- /dev/null +++ b/modules/nf-core/krona/kronadb/meta.yml @@ -0,0 +1,26 @@ +name: krona_kronadb +description: KronaTools Update Taxonomy downloads a taxonomy database +keywords: + - database + - taxonomy + - krona +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: https://github.com/marbl/Krona/wiki/Installing + doi: 10.1186/1471-2105-12-385 +# There is no input. This module downloads a pre-built taxonomy database for use with Krona Tools. +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: file + description: A TAB separated file that contains a taxonomy database. + pattern: "*.{tab}" +authors: + - "@mjakobs" +maintainers: + - "@mjakobs" diff --git a/modules/nf-core/krona/ktimporttaxonomy/environment.yml b/modules/nf-core/krona/ktimporttaxonomy/environment.yml new file mode 100644 index 00000000..1909e15f --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/environment.yml @@ -0,0 +1,7 @@ +name: krona_ktimporttaxonomy +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::krona=2.8 diff --git a/modules/nf-core/krona/ktimporttaxonomy/main.nf b/modules/nf-core/krona/ktimporttaxonomy/main.nf new file mode 100644 index 00000000..5a9f3ff8 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/main.nf @@ -0,0 +1,41 @@ +process KRONA_KTIMPORTTAXONOMY { + tag "${meta.id}" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.8--pl5262hdfd78af_2' : + 'biocontainers/krona:2.8--pl5262hdfd78af_2' }" + + input: + tuple val(meta), path(report) + path taxonomy, stageAs: 'taxonomy.tab' + + output: + tuple val(meta), path ('*.html'), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.8' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + TAXONOMY=\$(find -L . -name '*.tab' -exec dirname {} \\;) + echo \$TAXONOMY + + ktImportTaxonomy \\ + $args \\ + -o ${prefix}.html \\ + -tax \$TAXONOMY/ \\ + $report + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/krona/ktimporttaxonomy/meta.yml b/modules/nf-core/krona/ktimporttaxonomy/meta.yml new file mode 100644 index 00000000..de548210 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/meta.yml @@ -0,0 +1,45 @@ +name: krona_ktimporttaxonomy +description: KronaTools Import Taxonomy imports taxonomy classifications and produces an interactive Krona plot. +keywords: + - plot + - taxonomy + - interactive + - html + - visualisation + - krona chart +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html + doi: 10.1186/1471-2105-12-385 +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - database: + type: file + description: | + Path to a Krona taxonomy .tab file normally downloaded and generated by + krona/ktUpdateTaxonomy. Custom taxonomy files can have any name, but + must end in `.tab`. + pattern: "*tab" + - report: + type: file + description: "A tab-delimited file with taxonomy IDs and (optionally) query IDs, magnitudes, and scores. Query IDs are taken from column 1, taxonomy IDs from column 2, and scores from column 3. Lines beginning with # will be ignored." + pattern: "*.{tsv}" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - html: + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" +authors: + - "@mjakobs" +maintainers: + - "@mjakobs" diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..0c9cbb10 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,11 @@ +name: untar + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.7 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..8a75bb95 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..a9a2110f --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,46 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..2a7c97bf --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,47 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar") }, + ) + } + + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + ) + } + + } + +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..64550292 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,42 @@ +{ + "test_untar_onlyfiles": { + "content": [ + [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:41.320643" + }, + "test_untar": { + "content": [ + [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:33.795172" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/nextflow.config b/nextflow.config index 26736896..051be69d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -390,7 +390,7 @@ manifest { description = """Assembly, binning and annotation of metagenomes""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '3.0.0' + version = '3.0.1' doi = '10.1093/nargab/lqac007' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 7f84f338..b2847b0c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -491,11 +491,14 @@ "properties": { "centrifuge_db": { "type": "string", + "format": "file-path", + "exists": true, "description": "Database for taxonomic binning with centrifuge.", "help_text": "Local directory containing `*.cf` files, or a URL or local path to a downloaded compressed tar archive of a Centrifuge database. E.g. ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz." }, "kraken2_db": { "type": "string", + "format": "file-path", "description": "Database for taxonomic binning with kraken2.", "help_text": "Path to a local directory, archive file, or a URL to compressed tar archive that contains at least the three files `hash.k2d`, `opts.k2d` and `taxo.k2d`. E.g. ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken_8GB_202003.tgz." }, @@ -585,7 +588,7 @@ "genomad_db": { "type": "string", "description": "Database for virus classification with geNomad", - "help_text": "Must be a directory containing the uncompressed contents from https://portal.nersc.gov/genomad/__data__/genomad_db_v1.1.tar.gz" + "help_text": "Must be a directory containing the uncompressed contents from https://zenodo.org/doi/10.5281/zenodo.6994741 (nf-core/mag tested with v1.1)" } } }, diff --git a/workflows/mag.nf b/workflows/mag.nf index 8aec1ef3..7594b370 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -28,19 +28,26 @@ include { DEPTHS } from '../subworkflows/local/depths' // // MODULE: Installed directly from nf-core/modules // -include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' -include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' -include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' -include { FASTP } from '../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { PRODIGAL } from '../modules/nf-core/prodigal/main' -include { PROKKA } from '../modules/nf-core/prokka/main' -include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' -include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' +include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' +include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' +include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' +include { FASTP } from '../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' +include { UNTAR as CENTRIFUGEDB_UNTAR } from '../modules/nf-core/untar/main' +include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' +include { CENTRIFUGE_KREPORT } from '../modules/nf-core/centrifuge/kreport/main' +include { KRONA_KRONADB } from '../modules/nf-core/krona/kronadb/main' +include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' +include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' +include { PRODIGAL } from '../modules/nf-core/prodigal/main' +include { PROKKA } from '../modules/nf-core/prokka/main' +include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' +include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' // // MODULE: Local to the pipeline @@ -54,19 +61,14 @@ include { NANOLYSE } from '../modules include { FILTLONG } from '../modules/local/filtlong' include { NANOPLOT as NANOPLOT_RAW } from '../modules/local/nanoplot' include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/local/nanoplot' -include { CENTRIFUGE_DB_PREPARATION } from '../modules/local/centrifuge_db_preparation' -include { CENTRIFUGE } from '../modules/local/centrifuge' include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' include { KRAKEN2 } from '../modules/local/kraken2' -include { KRONA_DB } from '../modules/local/krona_db' -include { KRONA } from '../modules/local/krona' include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' include { MEGAHIT } from '../modules/local/megahit' include { SPADES } from '../modules/local/spades' include { SPADESHYBRID } from '../modules/local/spadeshybrid' -include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' include { QUAST } from '../modules/local/quast' include { QUAST_BINS } from '../modules/local/quast_bins' include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' @@ -111,12 +113,6 @@ if (params.gunc_db) { ch_gunc_db = Channel.empty() } -if(params.centrifuge_db){ - ch_centrifuge_db_file = file(params.centrifuge_db, checkIfExists: true) -} else { - ch_centrifuge_db_file = [] -} - if(params.kraken2_db){ ch_kraken2_db_file = file(params.kraken2_db, checkIfExists: true) } else { @@ -414,39 +410,31 @@ workflow MAG { Taxonomic information ================================================================================ */ - if ( !ch_centrifuge_db_file.isEmpty() ) { - if ( ch_centrifuge_db_file.extension in ['gz', 'tgz'] ) { - // Expects to be tar.gz! - ch_db_for_centrifuge = CENTRIFUGE_DB_PREPARATION ( ch_centrifuge_db_file ) - .db - .collect() - .map{ - db -> - def db_name = db[0].getBaseName().split('\\.')[0] - [ db_name, db ] - } - } else if ( ch_centrifuge_db_file.isDirectory() ) { - ch_db_for_centrifuge = Channel - .fromPath( "${ch_centrifuge_db_file}/*.cf" ) - .collect() - .map{ - db -> - def db_name = db[0].getBaseName().split('\\.')[0] - [ db_name, db ] - } + + // Centrifuge + if ( !params.centrifuge_db ) { + ch_db_for_centrifuge = Channel.empty() + } else { + if ( file(params.centrifuge_db).isDirectory() ) { + ch_db_for_centrifuge = Channel.of(file(params.centrifuge_db, checkIfExists: true)) } else { - ch_db_for_centrifuge = Channel.empty() + ch_db_for_centrifuge = CENTRIFUGEDB_UNTAR ( Channel.of([[id: 'db'], file(params.centrifuge_db, checkIfExists: true)])).untar.map{it[1]}.first() + ch_versions = ch_versions.mix(CENTRIFUGEDB_UNTAR.out.versions.first()) } - } else { - ch_db_for_centrifuge = Channel.empty() } - CENTRIFUGE ( + CENTRIFUGE_CENTRIFUGE ( ch_short_reads, - ch_db_for_centrifuge + ch_db_for_centrifuge, + false, + false ) - ch_versions = ch_versions.mix(CENTRIFUGE.out.versions.first()) + ch_versions = ch_versions.mix(CENTRIFUGE_CENTRIFUGE.out.versions.first()) + CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.results, ch_db_for_centrifuge ) + ch_versions = ch_versions.mix(CENTRIFUGE_KREPORT.out.versions.first()) + + // Kraken2 if ( !ch_kraken2_db_file.isEmpty() ) { if ( ch_kraken2_db_file.extension in ['gz', 'tgz'] ) { // Expects to be tar.gz! @@ -481,19 +469,32 @@ workflow MAG { if (params.krona_db){ ch_krona_db = ch_krona_db_file } else { - KRONA_DB () - ch_krona_db = KRONA_DB.out.db + KRONA_KRONADB () + ch_krona_db = KRONA_KRONADB.out.db + ch_versions = ch_versions.mix(KRONA_KRONADB.out.versions) } - ch_tax_classifications = CENTRIFUGE.out.results_for_krona.mix(KRAKEN2.out.results_for_krona) - . map { classifier, meta, report -> - def meta_new = meta + [classifier: classifier] - [ meta_new, report ] - } - KRONA ( + + if ( params.centrifuge_db ) { + ch_centrifuge_for_krona = KREPORT2KRONA_CENTRIFUGE ( CENTRIFUGE_KREPORT.out.kreport ).txt.map{ meta, files -> ['centrifuge', meta, files] } + ch_versions = ch_versions.mix(KREPORT2KRONA_CENTRIFUGE.out.versions.first()) + } else { + ch_centrifuge_for_krona = Channel.empty() + } + + // Join together for Krona + ch_tax_classifications = ch_centrifuge_for_krona + .mix(KRAKEN2.out.results_for_krona) + .map { classifier, meta, report -> + def meta_new = meta + [classifier: classifier] + [ meta_new, report ] + } + + KRONA_KTIMPORTTAXONOMY ( ch_tax_classifications, ch_krona_db ) - ch_versions = ch_versions.mix(KRONA.out.versions.first()) + ch_versions = ch_versions.mix(KRONA_KTIMPORTTAXONOMY.out.versions.first()) + } /* @@ -1064,7 +1065,7 @@ workflow MAG { } - ch_multiqc_files = ch_multiqc_files.mix(CENTRIFUGE.out.kreport.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(CENTRIFUGE_KREPORT.out.kreport.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2.out.report.collect{it[1]}.ifEmpty([])) if (!params.skip_quast){