From 90dffde9c556aa7d0d9a476adca13370e9821abb Mon Sep 17 00:00:00 2001 From: Eugenia Fontecha Date: Wed, 20 Mar 2024 16:26:46 +0000 Subject: [PATCH 01/13] remove anyOf from nextflow schema --- nextflow_schema.json | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7124fefd..aee0a340 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -37,16 +37,9 @@ "description": "Path to the reference panel or csv file with the list of panels", "fa_icon": "far fa-file-code", "format": "file-path", - "anyOf": [ - { - "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?$" - }, - { - "pattern": "^\\S+\\.(csv|tsv|txt)$", - "mimetype": "text/csv", - "schema": "assets/schema_input_panel.json" - } - ] + "pattern": "^\\S+\\.(csv|tsv|txt)$", + "mimetype": "text/csv", + "schema": "assets/schema_input_panel.json" }, "panel_index": { "type": "string", From e569fcba60b133eac1713dc9b6a8890bf1baec02 Mon Sep 17 00:00:00 2001 From: Eugenia Fontecha Date: Wed, 20 Mar 2024 16:28:00 +0000 Subject: [PATCH 02/13] add optional .gz extension to panel schema --- assets/schema_input_panel.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/schema_input_panel.json b/assets/schema_input_panel.json index 49593176..46ce0f7c 100644 --- a/assets/schema_input_panel.json +++ b/assets/schema_input_panel.json @@ -15,22 +15,22 @@ }, "vcf": { "type": "string", - "pattern": "^\\S+\\.vcf$", + "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?$", "errorMessage": "Panel vcf file must be provided, cannot contain spaces and must have extension '.vcf'" }, "index": { "type": "string", - "pattern": "^\\S+\\.vcf\\.(tbi|csi)$", + "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?\\.(tbi|csi)$", "errorMessage": "Panel vcf index file must be provided, cannot contain spaces and must have extension '.vcf.tbi' or '.vcf.csi'" }, "sites": { "type": "string", - "pattern": "^\\S+\\.sites$", + "pattern": "^\\S+\\.sites(\\.bcf)?$", "errorMessage": "Panel sites file must be provided, cannot contain spaces and must have extension '.sites'" }, "tsv": { "type": "string", - "pattern": "^\\S+\\.tsv$", + "pattern": "^\\S+\\.tsv(\\.gz)?$", "errorMessage": "Panel tsv file must be provided, cannot contain spaces and must have extension '.tsv'" }, "legend": { From 926372779c5f159a725b50c2705bde5cab22248c Mon Sep 17 00:00:00 2001 From: Eugenia Fontecha Date: Wed, 20 Mar 2024 16:28:44 +0000 Subject: [PATCH 03/13] change panel param to a csv --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 297d318a..1f046073 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,7 +24,7 @@ params { // Genome references fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" - panel = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.bcf" + panel = "${projectDir}/tests/csv/panel.csv" phased = true // Impute parameters From ff07f9aaeb616b395f45688def246236eea7db95 Mon Sep 17 00:00:00 2001 From: Eugenia Fontecha Date: Wed, 20 Mar 2024 16:30:12 +0000 Subject: [PATCH 04/13] update ch_panel creation to work with csv file --- .../utils_nfcore_phaseimpute_pipeline/main.nf | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index a443a2a5..164b78d5 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -126,17 +126,12 @@ workflow PIPELINE_INITIALISATION { // Create channel for panel // if (params.panel) { - if (params.panel.endsWith("csv|tsv|txt")) { - print("Panel file provided as input is a samplesheet") - ch_panel = Channel.fromSamplesheet("panel") - } else { - print("Panel file provided as input is a variant file") - ch_panel = Channel.of([ - [id: file(params.panel, checkIfExists:true).getBaseName()], - file(params.panel, checkIfExists:true), - params.panel_index ? file(params.panel_index, checkIfExists:true) : file(params.panel + ".csi", checkIfExists:true) - ]) - } + ch_panel = Channel + .fromSamplesheet("panel") + .map { + meta,vcf,index,sites,tsv,legend,phased -> + [ meta, vcf, index ] + } } // From ae6a8739373aa06ca9806357c5a6a6da11f075c5 Mon Sep 17 00:00:00 2001 From: Eugenia Fontecha Date: Wed, 20 Mar 2024 16:41:25 +0000 Subject: [PATCH 05/13] update CHANGELOG file --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe003c06..a44cd749 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository - [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed outdir from test config files +- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv ### `Fixed` From a608ac99ce586caf72ee1bb8e4b414d95ca77853 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 21 Mar 2024 12:19:19 +0100 Subject: [PATCH 06/13] Update tools --- modules.json | 12 +- modules/nf-core/glimpse/chunk/meta.yml | 2 +- modules/nf-core/glimpse/ligate/meta.yml | 2 +- .../nf-core/glimpse/ligate/tests/main.nf.test | 21 ++-- modules/nf-core/glimpse/phase/main.nf | 4 +- modules/nf-core/glimpse/phase/meta.yml | 2 +- .../nf-core/glimpse/phase/tests/main.nf.test | 14 +-- modules/nf-core/glimpse2/ligate/meta.yml | 98 ++++++++-------- modules/nf-core/shapeit5/ligate/meta.yml | 4 +- .../nf-core/vcf_impute_glimpse/main.nf | 4 +- .../nf-core/vcf_impute_glimpse/meta.yml | 14 +-- .../vcf_impute_glimpse/tests/main.nf.test | 47 +++----- .../tests/main.nf.test.snap | 108 ++++++++---------- .../nf-core/vcf_impute_glimpse/tests/tags.yml | 2 +- .../nf-core/vcf_phase_shapeit5/main.nf | 3 +- 15 files changed, 158 insertions(+), 179 deletions(-) diff --git a/modules.json b/modules.json index 63f743dd..77bda261 100644 --- a/modules.json +++ b/modules.json @@ -50,17 +50,17 @@ }, "glimpse/chunk": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "7e56daae390ff896b292ddc70823447683a79936", "installed_by": ["vcf_impute_glimpse"] }, "glimpse/ligate": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "7e56daae390ff896b292ddc70823447683a79936", "installed_by": ["vcf_impute_glimpse"] }, "glimpse/phase": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "7e56daae390ff896b292ddc70823447683a79936", "installed_by": ["vcf_impute_glimpse"] }, "glimpse2/chunk": { @@ -112,7 +112,7 @@ }, "shapeit5/ligate": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", "installed_by": ["vcf_phase_shapeit5"] }, "shapeit5/phasecommon": { @@ -156,12 +156,12 @@ }, "vcf_impute_glimpse": { "branch": "master", - "git_sha": "8dab3bf2aaa912730419080e96d2a7d98911db48", + "git_sha": "7e56daae390ff896b292ddc70823447683a79936", "installed_by": ["subworkflows"] }, "vcf_phase_shapeit5": { "branch": "master", - "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/glimpse/chunk/meta.yml b/modules/nf-core/glimpse/chunk/meta.yml index 65fa6218..e500d9e9 100644 --- a/modules/nf-core/glimpse/chunk/meta.yml +++ b/modules/nf-core/glimpse/chunk/meta.yml @@ -11,7 +11,7 @@ tools: documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" tool_dev_url: "https://github.com/odelaneau/GLIMPSE" doi: "10.1038/s41588-020-00756-0" - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map diff --git a/modules/nf-core/glimpse/ligate/meta.yml b/modules/nf-core/glimpse/ligate/meta.yml index da6066ad..c3b1485c 100644 --- a/modules/nf-core/glimpse/ligate/meta.yml +++ b/modules/nf-core/glimpse/ligate/meta.yml @@ -12,7 +12,7 @@ tools: documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" tool_dev_url: "https://github.com/odelaneau/GLIMPSE" doi: "10.1038/s41588-020-00756-0" - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map diff --git a/modules/nf-core/glimpse/ligate/tests/main.nf.test b/modules/nf-core/glimpse/ligate/tests/main.nf.test index e86ff231..7289fc91 100644 --- a/modules/nf-core/glimpse/ligate/tests/main.nf.test +++ b/modules/nf-core/glimpse/ligate/tests/main.nf.test @@ -3,10 +3,13 @@ nextflow_process { name "Test Process GLIMPSE_LIGATE" script "../main.nf" process "GLIMPSE_LIGATE" - tag "glimpse" - tag "glimpse/ligate" + tag "modules_nfcore" tag "modules" + tag "glimpse" + tag "glimpse/ligate" + tag "glimpse/phase" + tag "bcftools/index" test("test_glimpse_ligate") { setup { @@ -21,15 +24,15 @@ nextflow_process { ]) input_vcf = Channel.of([ [ id:'input'], // meta map - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) ref_panel = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) ]) ch_map = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), ]) input[0] = input_vcf @@ -44,7 +47,7 @@ nextflow_process { script "../../../bcftools/index/main.nf" process { """ - input[0] = GLIMPSE_PHASE.out.phased_variant + input[0] = GLIMPSE_PHASE.out.phased_variants """ } } @@ -53,7 +56,7 @@ nextflow_process { when { process { """ - input[0] = GLIMPSE_PHASE.out.phased_variant + input[0] = GLIMPSE_PHASE.out.phased_variants | groupTuple() | join (BCFTOOLS_INDEX.out.csi.groupTuple()) """ diff --git a/modules/nf-core/glimpse/phase/main.nf b/modules/nf-core/glimpse/phase/main.nf index a6f39d15..41004e60 100644 --- a/modules/nf-core/glimpse/phase/main.nf +++ b/modules/nf-core/glimpse/phase/main.nf @@ -4,14 +4,14 @@ process GLIMPSE_PHASE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/glimpse-bio:1.1.1--h2ce4488_2': + 'https://depot.galaxyproject.org/singularity/glimpse-bio:1.1.1--hce55b13_1': 'biocontainers/glimpse-bio:1.1.1--hce55b13_1' }" input: tuple val(meta) , path(input), path(input_index), path(samples_file), val(input_region), val(output_region), path(reference), path(reference_index), path(map) output: - tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: phased_variant + tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: phased_variants path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/glimpse/phase/meta.yml b/modules/nf-core/glimpse/phase/meta.yml index 696616d3..862033b7 100644 --- a/modules/nf-core/glimpse/phase/meta.yml +++ b/modules/nf-core/glimpse/phase/meta.yml @@ -12,7 +12,7 @@ tools: documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" tool_dev_url: "https://github.com/odelaneau/GLIMPSE" doi: "10.1038/s41588-020-00756-0" - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map diff --git a/modules/nf-core/glimpse/phase/tests/main.nf.test b/modules/nf-core/glimpse/phase/tests/main.nf.test index 0009387f..5c92cb1f 100644 --- a/modules/nf-core/glimpse/phase/tests/main.nf.test +++ b/modules/nf-core/glimpse/phase/tests/main.nf.test @@ -22,15 +22,15 @@ nextflow_process { ]) input_vcf = Channel.of([ [ id:'input'], // meta map - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) ref_panel = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) ]) ch_map = Channel.of([ - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), ]) input[0] = input_vcf @@ -47,7 +47,7 @@ nextflow_process { then { String targetFileName = "input_present_chr21_16650000-16800000.vcf.gz" - File selectedFile = process.out.phased_variant.stream() + File selectedFile = process.out.phased_variants.stream() .filter(vector -> vector.size() > 1) .map(vector -> new File(vector.get(1).toString())) .filter(file -> file.getName().equals(targetFileName)) @@ -58,7 +58,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.phased_variant.size() == 4}, + { assert process.out.phased_variants.size() == 4}, { assert snapshot(lines).match("imputed") } ) } diff --git a/modules/nf-core/glimpse2/ligate/meta.yml b/modules/nf-core/glimpse2/ligate/meta.yml index ffedb344..2da33567 100644 --- a/modules/nf-core/glimpse2/ligate/meta.yml +++ b/modules/nf-core/glimpse2/ligate/meta.yml @@ -1,49 +1,49 @@ -name: "glimpse2_ligate" -description: | - Ligatation of multiple phased BCF/VCF files into a single whole chromosome file. - GLIMPSE2 is run in chunks that are ligated into chromosome-wide files maintaining the phasing. -keywords: - - ligate - - low-coverage - - glimpse - - imputation -tools: - - "glimpse2": - description: "GLIMPSE2 is a phasing and imputation method for large-scale low-coverage sequencing studies." - homepage: "https://odelaneau.github.io/GLIMPSE" - documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" - tool_dev_url: "https://github.com/odelaneau/GLIMPSE" - doi: "10.1038/s41588-020-00756-0" - licence: "['MIT']" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_list: - type: file - description: VCF/BCF file containing genotype probabilities (GP field). - pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" - - input_index: - type: file - description: Index file of the input VCF/BCF file containing genotype likelihoods. - pattern: "*.{csi,tbi}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - merged_variants: - type: file - description: Output ligated (phased) file in VCF/BCF format. - pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" -authors: - - "@louislenezet" -maintainers: - - "@louislenezet" +name: "glimpse2_ligate" +description: | + Ligatation of multiple phased BCF/VCF files into a single whole chromosome file. + GLIMPSE2 is run in chunks that are ligated into chromosome-wide files maintaining the phasing. +keywords: + - ligate + - low-coverage + - glimpse + - imputation +tools: + - "glimpse2": + description: "GLIMPSE2 is a phasing and imputation method for large-scale low-coverage sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: "['MIT']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_list: + type: file + description: VCF/BCF file containing genotype probabilities (GP field). + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - input_index: + type: file + description: Index file of the input VCF/BCF file containing genotype likelihoods. + pattern: "*.{csi,tbi}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - merged_variants: + type: file + description: Output ligated (phased) file in VCF/BCF format. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/shapeit5/ligate/meta.yml b/modules/nf-core/shapeit5/ligate/meta.yml index ab68274e..ed1e5e9e 100644 --- a/modules/nf-core/shapeit5/ligate/meta.yml +++ b/modules/nf-core/shapeit5/ligate/meta.yml @@ -12,8 +12,8 @@ tools: homepage: "https://odelaneau.github.io/shapeit5/" documentation: "https://odelaneau.github.io/shapeit5/docs/documentation" tool_dev_url: "https://github.com/odelaneau/shapeit5" - doi: "10.1101/2022.10.19.512867 " - licence: "['MIT']" + doi: "10.1101/2022.10.19.512867" + licence: ["MIT"] input: - meta: type: map diff --git a/subworkflows/nf-core/vcf_impute_glimpse/main.nf b/subworkflows/nf-core/vcf_impute_glimpse/main.nf index 5fe26551..94262e34 100644 --- a/subworkflows/nf-core/vcf_impute_glimpse/main.nf +++ b/subworkflows/nf-core/vcf_impute_glimpse/main.nf @@ -33,11 +33,11 @@ workflow VCF_IMPUTE_GLIMPSE { GLIMPSE_PHASE ( phase_input ) // [meta, vcf, index, sample_infos, regionin, regionout, ref, ref_index, map] ch_versions = ch_versions.mix(GLIMPSE_PHASE.out.versions ) - INDEX_PHASE ( GLIMPSE_PHASE.out.phased_variant ) + INDEX_PHASE ( GLIMPSE_PHASE.out.phased_variants ) ch_versions = ch_versions.mix( INDEX_PHASE.out.versions ) // Ligate all phased files in one and index it - ligate_input = GLIMPSE_PHASE.out.phased_variant + ligate_input = GLIMPSE_PHASE.out.phased_variants .groupTuple( by: 0 ) .combine( INDEX_PHASE.out.csi .groupTuple( by: 0 ), diff --git a/subworkflows/nf-core/vcf_impute_glimpse/meta.yml b/subworkflows/nf-core/vcf_impute_glimpse/meta.yml index 6c9fe223..81b3b4d5 100644 --- a/subworkflows/nf-core/vcf_impute_glimpse/meta.yml +++ b/subworkflows/nf-core/vcf_impute_glimpse/meta.yml @@ -30,19 +30,17 @@ output: description: | Tab delimited output txt file containing buffer and imputation regions. Structure: [meta, txt] - - phased_variants: - type: file - description: | - Output VCF/BCF file containing genotype probabilities (GP field), - imputed dosages (DS field), best guess genotypes (GT field), - sampled haplotypes in the last (max 16) main iterations (HS field) and info-score. - Structure: [ val(meta), bcf ] - merged_variants: type: file description: | - Output VCF/BCF file for the merged regions. + Output phased VCF/BCF file for the merged regions. Phased information (HS field) is updated accordingly for the full region. Structure: [ val(meta), bcf ] + - merged_variants_index: + type: file + description: | + Index output of phased VCF/BCF file for the merged regions. + Structure: [ val(meta), csi ] - versions: type: file description: File containing software versions diff --git a/subworkflows/nf-core/vcf_impute_glimpse/tests/main.nf.test b/subworkflows/nf-core/vcf_impute_glimpse/tests/main.nf.test index 7931f706..46db4244 100644 --- a/subworkflows/nf-core/vcf_impute_glimpse/tests/main.nf.test +++ b/subworkflows/nf-core/vcf_impute_glimpse/tests/main.nf.test @@ -3,13 +3,14 @@ nextflow_workflow { name "Test Workflow VCF_IMPUTE_GLIMPSE" script "../main.nf" workflow "VCF_IMPUTE_GLIMPSE" - tag "glimpse" - tag "glimpse/chunk" - tag "glimpse/phase" - tag "glimpse/ligate" + tag "subworkflows" + tag "bcftools/index" tag "subworkflows_nfcore" - tag "vcf_impute_glimpse" + tag "subworkflows/vcf_impute_glimpse" + tag "glimpse/phase" + tag "glimpse/ligate" + tag "glimpse/chunk" test("Should run without failures") { config "./nextflow.config" @@ -24,14 +25,14 @@ nextflow_workflow { ch_panel = Channel.fromList([ [[ ref:'ref_panel'], - file("https://github.com/nf-core/test-datasets/raw/imputation/data/panel/both/1000GP.chr21_22.noNA12878.s.bcf", + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.bcf", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/imputation/data/panel/both/1000GP.chr21_22.noNA12878.s.bcf.csi", + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.bcf.csi", checkIfExists: true)], [[ ref:'ref_panel2'], - file("https://github.com/nf-core/test-datasets/raw/imputation/data/panel/both/1000GP.chr21_22.noNA12878.s.bcf", + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.bcf", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/imputation/data/panel/both/1000GP.chr21_22.noNA12878.s.bcf.csi", + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/panel/21_22/1000GP.chr21_22.s.norel.bcf.csi", checkIfExists: true)] ]) region = Channel.fromList([ @@ -40,13 +41,13 @@ nextflow_workflow { ]) input_vcf = Channel.fromList([ - [[ id:'input'], // meta map - file("https://github.com/nf-core/test-datasets/raw/imputation/data/NA12878/both/NA12878.chr21_22.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/imputation/data/NA12878/both/NA12878.chr21_22.s.1x.vcf.gz.csi", checkIfExists: true), + [[ id:'NA12878'], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA12878/NA12878.s.1x.bcf", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA12878/NA12878.s.1x.bcf.csi", checkIfExists: true), ], - [[ id:'input2'], // meta map - file("https://github.com/nf-core/test-datasets/raw/imputation/data/NA12878/both/NA12878.chr21_22.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/imputation/data/NA12878/both/NA12878.chr21_22.s.1x.vcf.gz.csi", checkIfExists: true), + [[ id:'NA19401'], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA19401/NA19401.s.1x.bcf", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/individuals/NA19401/NA19401.s.1x.bcf.csi", checkIfExists: true), ] ]) input_vcf_multiple = input_vcf @@ -58,10 +59,10 @@ nextflow_workflow { ch_map = Channel.fromList([ [[ chr: "chr21"], - file("https://github.com/nf-core/test-datasets/raw/imputation/data/genetic_maps.b38/chr21.b38.gmap.gz", checkIfExists: true) + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/GRCh38.chr21.s.map", checkIfExists: true) ], [[ chr: "chr22"], - file("https://github.com/nf-core/test-datasets/raw/imputation/data/genetic_maps.b38/chr22.b38.gmap.gz", checkIfExists: true) + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/GRCh38.chr22.s.map", checkIfExists: true) ] ]) @@ -85,17 +86,7 @@ nextflow_workflow { } then { - println(workflow.out.merged_variants) - String targetFileName = "input_chr21_ref_panel2_ligate.vcf.gz" - File selectedFile = workflow.out.merged_variants.stream() - .filter(vector -> vector.size() > 1) - .map(vector -> new File(vector.get(1).toString())) - .filter(file -> file.getName().equals(targetFileName)) - .findFirst() - .orElse(null) - String selectedFilename = selectedFile != null ? selectedFile.getPath() : null - def lines = path(selectedFilename).linesGzip.last() - print(lines) + def lines = path(workflow.out.merged_variants.get(0).get(1)).linesGzip.last() assertAll( { assert workflow.success }, { assert snapshot(workflow.out.versions).match("versions") }, diff --git a/subworkflows/nf-core/vcf_impute_glimpse/tests/main.nf.test.snap b/subworkflows/nf-core/vcf_impute_glimpse/tests/main.nf.test.snap index 9265c429..287930cd 100644 --- a/subworkflows/nf-core/vcf_impute_glimpse/tests/main.nf.test.snap +++ b/subworkflows/nf-core/vcf_impute_glimpse/tests/main.nf.test.snap @@ -4,79 +4,83 @@ [ [ { - "id": "input", + "id": "NA12878", "chr": "chr21", "region": "chr21:16600000-16800000", "ref": "ref_panel2" }, - "input_chr21:16600000-16800000_chunk.txt:md5,426f0f75efed96818656876116f73b06" + "NA12878_chr21:16600000-16800000_chunk.txt:md5,775240b195e782b3b83adf52e0d17089" ], [ { - "id": "input", + "id": "NA12878", "chr": "chr21", "region": "chr21:16600000-16800000", "ref": "ref_panel" }, - "input_chr21:16600000-16800000_chunk.txt:md5,426f0f75efed96818656876116f73b06" + "NA12878_chr21:16600000-16800000_chunk.txt:md5,775240b195e782b3b83adf52e0d17089" ], [ { - "id": "input", + "id": "NA12878", "chr": "chr22", "region": "chr22:16600000-16800000", "ref": "ref_panel2" }, - "input_chr22:16600000-16800000_chunk.txt:md5,4f48d33fd9c1e235ccfd25827a92ddd1" + "NA12878_chr22:16600000-16800000_chunk.txt:md5,f5270ed0faa4f9697618444b267442ce" ], [ { - "id": "input", + "id": "NA12878", "chr": "chr22", "region": "chr22:16600000-16800000", "ref": "ref_panel" }, - "input_chr22:16600000-16800000_chunk.txt:md5,4f48d33fd9c1e235ccfd25827a92ddd1" + "NA12878_chr22:16600000-16800000_chunk.txt:md5,f5270ed0faa4f9697618444b267442ce" ], [ { - "id": "input2", + "id": "NA19401", "chr": "chr21", "region": "chr21:16600000-16800000", "ref": "ref_panel2" }, - "input2_chr21:16600000-16800000_chunk.txt:md5,426f0f75efed96818656876116f73b06" + "NA19401_chr21:16600000-16800000_chunk.txt:md5,775240b195e782b3b83adf52e0d17089" ], [ { - "id": "input2", + "id": "NA19401", "chr": "chr21", "region": "chr21:16600000-16800000", "ref": "ref_panel" }, - "input2_chr21:16600000-16800000_chunk.txt:md5,426f0f75efed96818656876116f73b06" + "NA19401_chr21:16600000-16800000_chunk.txt:md5,775240b195e782b3b83adf52e0d17089" ], [ { - "id": "input2", + "id": "NA19401", "chr": "chr22", "region": "chr22:16600000-16800000", "ref": "ref_panel2" }, - "input2_chr22:16600000-16800000_chunk.txt:md5,4f48d33fd9c1e235ccfd25827a92ddd1" + "NA19401_chr22:16600000-16800000_chunk.txt:md5,f5270ed0faa4f9697618444b267442ce" ], [ { - "id": "input2", + "id": "NA19401", "chr": "chr22", "region": "chr22:16600000-16800000", "ref": "ref_panel" }, - "input2_chr22:16600000-16800000_chunk.txt:md5,4f48d33fd9c1e235ccfd25827a92ddd1" + "NA19401_chr22:16600000-16800000_chunk.txt:md5,f5270ed0faa4f9697618444b267442ce" ] ] ], - "timestamp": "2023-10-17T18:20:13.045340135" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T23:16:59.014613786" }, "versions": { "content": [ @@ -105,54 +109,38 @@ "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,7ae4d2b0252f9382dd08d783b7a234d2", - "versions.yml:md5,86d1ec939ded96a4ed3fb73ff83c240b", - "versions.yml:md5,86d1ec939ded96a4ed3fb73ff83c240b", - "versions.yml:md5,86d1ec939ded96a4ed3fb73ff83c240b", - "versions.yml:md5,86d1ec939ded96a4ed3fb73ff83c240b", - "versions.yml:md5,86d1ec939ded96a4ed3fb73ff83c240b", - "versions.yml:md5,86d1ec939ded96a4ed3fb73ff83c240b", - "versions.yml:md5,86d1ec939ded96a4ed3fb73ff83c240b", - "versions.yml:md5,86d1ec939ded96a4ed3fb73ff83c240b", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae", - "versions.yml:md5,a17a10f6eb5e140e388b92d5f19680ae" + "versions.yml:md5,a802158fea97c36620863658efb7ae68", + "versions.yml:md5,a802158fea97c36620863658efb7ae68", + "versions.yml:md5,a802158fea97c36620863658efb7ae68", + "versions.yml:md5,a802158fea97c36620863658efb7ae68", + "versions.yml:md5,a802158fea97c36620863658efb7ae68", + "versions.yml:md5,a802158fea97c36620863658efb7ae68", + "versions.yml:md5,a802158fea97c36620863658efb7ae68", + "versions.yml:md5,a802158fea97c36620863658efb7ae68", + "versions.yml:md5,e37bdea2d40f36ce8546f87c3e572c96", + "versions.yml:md5,e37bdea2d40f36ce8546f87c3e572c96", + "versions.yml:md5,e37bdea2d40f36ce8546f87c3e572c96", + "versions.yml:md5,e37bdea2d40f36ce8546f87c3e572c96", + "versions.yml:md5,e37bdea2d40f36ce8546f87c3e572c96", + "versions.yml:md5,e37bdea2d40f36ce8546f87c3e572c96", + "versions.yml:md5,e37bdea2d40f36ce8546f87c3e572c96", + "versions.yml:md5,e37bdea2d40f36ce8546f87c3e572c96" ] ], - "timestamp": "2023-10-17T18:20:06.846882518" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T23:16:58.336134471" }, "merged": { "content": [ - "chr21\t16799989\t21:16799989:T:C\tT\tC\t.\t.\tRAF=0.000468897;AF=0;INFO=1\tGT:DS:GP:HS\t0/0:0:1,0,0:0" + "chr21\t16609998\t21:16609998:A:G\tA\tG\t.\t.\tRAF=0.00125156;AF=0;INFO=1\tGT:DS:GP:HS\t0/0:0:1,0,0:0" ], - "timestamp": "2023-10-17T18:20:19.789038503" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T23:16:59.786459305" } } \ No newline at end of file diff --git a/subworkflows/nf-core/vcf_impute_glimpse/tests/tags.yml b/subworkflows/nf-core/vcf_impute_glimpse/tests/tags.yml index 34d27725..24003ec0 100644 --- a/subworkflows/nf-core/vcf_impute_glimpse/tests/tags.yml +++ b/subworkflows/nf-core/vcf_impute_glimpse/tests/tags.yml @@ -1,2 +1,2 @@ -vcf_impute_glimpse: +subworkflows/vcf_impute_glimpse: - subworkflows/nf-core/vcf_impute_glimpse/** diff --git a/subworkflows/nf-core/vcf_phase_shapeit5/main.nf b/subworkflows/nf-core/vcf_phase_shapeit5/main.nf index 0ddebcb3..966f9019 100644 --- a/subworkflows/nf-core/vcf_phase_shapeit5/main.nf +++ b/subworkflows/nf-core/vcf_phase_shapeit5/main.nf @@ -66,10 +66,9 @@ workflow VCF_PHASE_SHAPEIT5 { ch_ligate_input = SHAPEIT5_PHASECOMMON.out.phased_variant .join(VCF_INDEX1.out.csi, failOnMismatch:true, failOnDuplicate:true) - .view() .map{ meta, vcf, csi -> newmeta = meta + [id: meta.id.split("_")[0..-2].join("_")] - [newmeta, vcf, csi]}.view() + [newmeta, vcf, csi]} .combine(ch_chunks_number, by:0) .map{meta, vcf, csi, chunks_num -> [groupKey(meta, chunks_num), vcf, csi]} From 44eace0f43512cbff8ff5e44adb6dab1c8715474 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 21 Mar 2024 12:21:09 +0100 Subject: [PATCH 07/13] Update changelog --- CHANGELOG.md | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a44cd749..bfe3572a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,22 @@ -# nf-core/phaseimpute: Changelog - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## v1.0dev - [date] - -Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co.re/) template. - -### `Added` - -### `Changed` - -- [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository -- [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed outdir from test config files -- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv - -### `Fixed` - -### `Dependencies` - -### `Deprecated` +# nf-core/phaseimpute: Changelog + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v1.0dev - [date] + +Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +### `Changed` + +- [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository +- [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed outdir from test config files +- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1 and shapeit5) + +### `Fixed` + +### `Dependencies` + +### `Deprecated` From f78f182f28a3cfc0b6506a99a3fbdb64126473e3 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 21 Mar 2024 12:25:29 +0100 Subject: [PATCH 08/13] Prettify --- CHANGELOG.md | 44 +++++------ modules/nf-core/glimpse2/ligate/meta.yml | 98 ++++++++++++------------ 2 files changed, 71 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bfe3572a..c728319e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,22 @@ -# nf-core/phaseimpute: Changelog - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## v1.0dev - [date] - -Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co.re/) template. - -### `Added` - -### `Changed` - -- [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository -- [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed outdir from test config files -- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1 and shapeit5) - -### `Fixed` - -### `Dependencies` - -### `Deprecated` +# nf-core/phaseimpute: Changelog + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v1.0dev - [date] + +Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +### `Changed` + +- [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository +- [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed outdir from test config files +- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1 and shapeit5) + +### `Fixed` + +### `Dependencies` + +### `Deprecated` diff --git a/modules/nf-core/glimpse2/ligate/meta.yml b/modules/nf-core/glimpse2/ligate/meta.yml index 2da33567..ffedb344 100644 --- a/modules/nf-core/glimpse2/ligate/meta.yml +++ b/modules/nf-core/glimpse2/ligate/meta.yml @@ -1,49 +1,49 @@ -name: "glimpse2_ligate" -description: | - Ligatation of multiple phased BCF/VCF files into a single whole chromosome file. - GLIMPSE2 is run in chunks that are ligated into chromosome-wide files maintaining the phasing. -keywords: - - ligate - - low-coverage - - glimpse - - imputation -tools: - - "glimpse2": - description: "GLIMPSE2 is a phasing and imputation method for large-scale low-coverage sequencing studies." - homepage: "https://odelaneau.github.io/GLIMPSE" - documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" - tool_dev_url: "https://github.com/odelaneau/GLIMPSE" - doi: "10.1038/s41588-020-00756-0" - licence: "['MIT']" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_list: - type: file - description: VCF/BCF file containing genotype probabilities (GP field). - pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" - - input_index: - type: file - description: Index file of the input VCF/BCF file containing genotype likelihoods. - pattern: "*.{csi,tbi}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - merged_variants: - type: file - description: Output ligated (phased) file in VCF/BCF format. - pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" -authors: - - "@louislenezet" -maintainers: - - "@louislenezet" +name: "glimpse2_ligate" +description: | + Ligatation of multiple phased BCF/VCF files into a single whole chromosome file. + GLIMPSE2 is run in chunks that are ligated into chromosome-wide files maintaining the phasing. +keywords: + - ligate + - low-coverage + - glimpse + - imputation +tools: + - "glimpse2": + description: "GLIMPSE2 is a phasing and imputation method for large-scale low-coverage sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: "['MIT']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_list: + type: file + description: VCF/BCF file containing genotype probabilities (GP field). + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - input_index: + type: file + description: Index file of the input VCF/BCF file containing genotype likelihoods. + pattern: "*.{csi,tbi}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - merged_variants: + type: file + description: Output ligated (phased) file in VCF/BCF format. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" From 7735feabb98d031f4afca2c5acb594688dba74fb Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 21 Mar 2024 12:28:31 +0100 Subject: [PATCH 09/13] Update glimpse2 --- modules/nf-core/glimpse2/ligate/meta.yml | 2 +- .../glimpse2/ligate/tests/main.nf.test | 76 +++ .../glimpse2/ligate/tests/main.nf.test.snap | 14 + .../nf-core/glimpse2/ligate/tests/tags.yml | 2 + subworkflows/local/compute_gl/main.nf | 76 +-- subworkflows/local/get_panel/main.nf | 180 ++--- .../utils_nfcore_phaseimpute_pipeline/main.nf | 624 +++++++++--------- 7 files changed, 533 insertions(+), 441 deletions(-) create mode 100644 modules/nf-core/glimpse2/ligate/tests/main.nf.test create mode 100644 modules/nf-core/glimpse2/ligate/tests/main.nf.test.snap create mode 100644 modules/nf-core/glimpse2/ligate/tests/tags.yml diff --git a/modules/nf-core/glimpse2/ligate/meta.yml b/modules/nf-core/glimpse2/ligate/meta.yml index ffedb344..7c07973f 100644 --- a/modules/nf-core/glimpse2/ligate/meta.yml +++ b/modules/nf-core/glimpse2/ligate/meta.yml @@ -14,7 +14,7 @@ tools: documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" tool_dev_url: "https://github.com/odelaneau/GLIMPSE" doi: "10.1038/s41588-020-00756-0" - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map diff --git a/modules/nf-core/glimpse2/ligate/tests/main.nf.test b/modules/nf-core/glimpse2/ligate/tests/main.nf.test new file mode 100644 index 00000000..d45c448b --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/tests/main.nf.test @@ -0,0 +1,76 @@ +nextflow_process { + + name "Test Process GLIMPSE2_LIGATE" + script "../main.nf" + + process "GLIMPSE2_LIGATE" + + tag "modules_nfcore" + tag "modules" + tag "glimpse2" + tag "glimpse2/ligate" + tag "bcftools/index" + tag "glimpse2/phase" + + test("Should run glimpse ligate") { + setup { + run("GLIMPSE2_PHASE") { + script "../../phase/main.nf" + process { + """ + input_vcf = Channel.of([ + [ id:'input' ], // meta map + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), + [], + "chr21:16600000-16800000", + "chr21:16650000-16750000" + ]) + + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_vcf + .combine(ref_panel) + .combine(map_file) + input[1] = Channel.of([[],[],[]]) + """ + } + } + run("BCFTOOLS_INDEX") { + script "../../../bcftools/index/main.nf" + process { + """ + input[0] = GLIMPSE2_PHASE.out.phased_variants + """ + } + } + } + + when { + process { + """ + input[0] = GLIMPSE2_PHASE.out.phased_variants + | groupTuple() + | join (BCFTOOLS_INDEX.out.csi.groupTuple()) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert file(process.out.merged_variants[0][1]).name == "input.vcf.gz" } + ) + } + + } +} diff --git a/modules/nf-core/glimpse2/ligate/tests/main.nf.test.snap b/modules/nf-core/glimpse2/ligate/tests/main.nf.test.snap new file mode 100644 index 00000000..a1b0b8c8 --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,44addcaef4965ff6409a8293c5bcad84" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T15:52:19.469961519" + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/ligate/tests/tags.yml b/modules/nf-core/glimpse2/ligate/tests/tags.yml new file mode 100644 index 00000000..1613896f --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/tests/tags.yml @@ -0,0 +1,2 @@ +glimpse2/ligate: + - modules/nf-core/glimpse2/ligate/** diff --git a/subworkflows/local/compute_gl/main.nf b/subworkflows/local/compute_gl/main.nf index 277933d1..2011561d 100644 --- a/subworkflows/local/compute_gl/main.nf +++ b/subworkflows/local/compute_gl/main.nf @@ -1,38 +1,38 @@ -include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main.nf' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' - - -workflow COMPUTE_GL { - - take: - ch_input // channel: [ [id, ref], bam, bai ] - ch_target // channel: [ [panel], sites, tsv] - ch_fasta // channel: [ [ref], fasta, fai] - - main: - - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - - ch_mpileup = ch_input - .combine(ch_target) - .map{metaI, bam, bai, metaP, sites, tsv -> - [metaI + metaP, bam, sites, tsv]} - - BCFTOOLS_MPILEUP( - ch_mpileup, - ch_fasta, - false - ) - ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions.first()) - - ch_output = BCFTOOLS_MPILEUP.out.vcf - .combine(BCFTOOLS_MPILEUP.out.tbi, by:0) - - ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_MPILEUP.out.stats.map{ it[1] }) - - emit: - vcf = ch_output // channel: [ [id, panel], vcf, tbi ] - versions = ch_versions // channel: [ versions.yml ] - multiqc_files = ch_multiqc_files -} +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main.nf' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' + + +workflow COMPUTE_GL { + + take: + ch_input // channel: [ [id, ref], bam, bai ] + ch_target // channel: [ [panel], sites, tsv] + ch_fasta // channel: [ [ref], fasta, fai] + + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + ch_mpileup = ch_input + .combine(ch_target) + .map{metaI, bam, bai, metaP, sites, tsv -> + [metaI + metaP, bam, sites, tsv]} + + BCFTOOLS_MPILEUP( + ch_mpileup, + ch_fasta, + false + ) + ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions.first()) + + ch_output = BCFTOOLS_MPILEUP.out.vcf + .combine(BCFTOOLS_MPILEUP.out.tbi, by:0) + + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_MPILEUP.out.stats.map{ it[1] }) + + emit: + vcf = ch_output // channel: [ [id, panel], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files +} diff --git a/subworkflows/local/get_panel/main.nf b/subworkflows/local/get_panel/main.nf index 4539647b..69ffcb8f 100644 --- a/subworkflows/local/get_panel/main.nf +++ b/subworkflows/local/get_panel/main.nf @@ -1,90 +1,90 @@ -include { BCFTOOLS_VIEW as VIEW_VCF_SNPS } from '../../../modules/nf-core/bcftools/view/main.nf' -include { BCFTOOLS_VIEW as VIEW_VCF_SITES } from '../../../modules/nf-core/bcftools/view/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX1 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX3 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX4 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX5 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main.nf' -include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query/main.nf' -include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' -include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/nf-core/vcf_phase_shapeit5/main' - - -workflow GET_PANEL { - take: - ch_vcf // channel: [ [id], vcf, index ] - ch_fasta // channel: [ [genome], fasta, fai ] - - main: - - ch_versions = Channel.empty() - - BCFTOOLS_NORM(ch_vcf, ch_fasta.map{ genome, fasta, fai -> [genome, fasta] }) - ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) - - // Extract only the SNP - VIEW_VCF_SNPS(BCFTOOLS_NORM.out.vcf // [ meta, vcf ] - .combine(Channel.of([[]])), [], [], []) - ch_versions = ch_versions.mix(VIEW_VCF_SNPS.out.versions.first()) - - VCF_INDEX3(VIEW_VCF_SNPS.out.vcf) - ch_versions = ch_versions.mix(VCF_INDEX3.out.versions.first()) - - ch_panel_norm = VIEW_VCF_SNPS.out.vcf - .combine(VCF_INDEX3.out.csi, by:0) - - // Extract sites positions - vcf_region = VIEW_VCF_SNPS.out.vcf - .combine(VCF_INDEX3.out.csi, by:0) - VIEW_VCF_SITES( ch_panel_norm, - [], [], []) - ch_versions = ch_versions.mix(VIEW_VCF_SITES.out.versions.first()) - - VCF_INDEX4(VIEW_VCF_SITES.out.vcf) - ch_versions = ch_versions.mix(VCF_INDEX4.out.versions.first()) - - ch_panel_sites = VIEW_VCF_SITES.out.vcf - .combine(VCF_INDEX4.out.csi, by:0) - - // Convert to TSV - BCFTOOLS_QUERY(ch_panel_sites, - [], [], []) - ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) - - TABIX_BGZIP(BCFTOOLS_QUERY.out.output) - ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) - - TABIX_TABIX(TABIX_BGZIP.out.output) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) - - ch_panel_tsv = TABIX_BGZIP.out.output - .combine(TABIX_TABIX.out.tbi, by: 0) - - // Phase panel - if (params.phased == false) { - VCF_PHASE_SHAPEIT5(vcf_region - .map { meta, vcf, csi -> [meta, vcf, csi, [], meta.region] }, - Channel.of([[],[],[]]).collect(), - Channel.of([[],[],[]]).collect(), - Channel.of([[],[]]).collect()) - ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions.first()) - ch_panel_phased = VCF_PHASE_SHAPEIT5.out.variants_phased - .combine(VCF_PHASE_SHAPEIT5.out.variants_index, by: 0) - } else { - ch_panel_phased = VIEW_VCF_SNPS.out.vcf - .combine(VCF_INDEX3.out.csi, by: 0) - } - - ch_panel = ch_panel_norm - .combine(ch_panel_sites, by: 0) - .combine(ch_panel_tsv, by: 0) - .combine(ch_panel_phased, by: 0) - .map{ metaI, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [[panel:metaI.id], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] - } - - emit: - panel = ch_panel // channel: [ [panel], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] - versions = ch_versions // channel: [ versions.yml ] -} +include { BCFTOOLS_VIEW as VIEW_VCF_SNPS } from '../../../modules/nf-core/bcftools/view/main.nf' +include { BCFTOOLS_VIEW as VIEW_VCF_SITES } from '../../../modules/nf-core/bcftools/view/main.nf' +include { BCFTOOLS_INDEX as VCF_INDEX1 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as VCF_INDEX3 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as VCF_INDEX4 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as VCF_INDEX5 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main.nf' +include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query/main.nf' +include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' +include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/nf-core/vcf_phase_shapeit5/main' + + +workflow GET_PANEL { + take: + ch_vcf // channel: [ [id], vcf, index ] + ch_fasta // channel: [ [genome], fasta, fai ] + + main: + + ch_versions = Channel.empty() + + BCFTOOLS_NORM(ch_vcf, ch_fasta.map{ genome, fasta, fai -> [genome, fasta] }) + ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) + + // Extract only the SNP + VIEW_VCF_SNPS(BCFTOOLS_NORM.out.vcf // [ meta, vcf ] + .combine(Channel.of([[]])), [], [], []) + ch_versions = ch_versions.mix(VIEW_VCF_SNPS.out.versions.first()) + + VCF_INDEX3(VIEW_VCF_SNPS.out.vcf) + ch_versions = ch_versions.mix(VCF_INDEX3.out.versions.first()) + + ch_panel_norm = VIEW_VCF_SNPS.out.vcf + .combine(VCF_INDEX3.out.csi, by:0) + + // Extract sites positions + vcf_region = VIEW_VCF_SNPS.out.vcf + .combine(VCF_INDEX3.out.csi, by:0) + VIEW_VCF_SITES( ch_panel_norm, + [], [], []) + ch_versions = ch_versions.mix(VIEW_VCF_SITES.out.versions.first()) + + VCF_INDEX4(VIEW_VCF_SITES.out.vcf) + ch_versions = ch_versions.mix(VCF_INDEX4.out.versions.first()) + + ch_panel_sites = VIEW_VCF_SITES.out.vcf + .combine(VCF_INDEX4.out.csi, by:0) + + // Convert to TSV + BCFTOOLS_QUERY(ch_panel_sites, + [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) + + TABIX_BGZIP(BCFTOOLS_QUERY.out.output) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) + + TABIX_TABIX(TABIX_BGZIP.out.output) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) + + ch_panel_tsv = TABIX_BGZIP.out.output + .combine(TABIX_TABIX.out.tbi, by: 0) + + // Phase panel + if (params.phased == false) { + VCF_PHASE_SHAPEIT5(vcf_region + .map { meta, vcf, csi -> [meta, vcf, csi, [], meta.region] }, + Channel.of([[],[],[]]).collect(), + Channel.of([[],[],[]]).collect(), + Channel.of([[],[]]).collect()) + ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions.first()) + ch_panel_phased = VCF_PHASE_SHAPEIT5.out.variants_phased + .combine(VCF_PHASE_SHAPEIT5.out.variants_index, by: 0) + } else { + ch_panel_phased = VIEW_VCF_SNPS.out.vcf + .combine(VCF_INDEX3.out.csi, by: 0) + } + + ch_panel = ch_panel_norm + .combine(ch_panel_sites, by: 0) + .combine(ch_panel_tsv, by: 0) + .combine(ch_panel_phased, by: 0) + .map{ metaI, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + -> [[panel:metaI.id], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] + } + + emit: + panel = ch_panel // channel: [ [panel], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 164b78d5..2a26704d 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -1,312 +1,312 @@ -// -// Subworkflow with functionality specific to the nf-core/phaseimpute pipeline -// - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { fromSamplesheet } from 'plugin/nf-validation' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' -include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' -include { GET_REGION } from '../get_region' -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' - -/* -======================================================================================== - SUBWORKFLOW TO INITIALISE PIPELINE -======================================================================================== -*/ - -workflow PIPELINE_INITIALISATION { - - take: - version // boolean: Display version and exit - help // boolean: Display help text - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs - nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet - - main: - - ch_versions = Channel.empty() - - // - // Print version and exit if required and dump pipeline parameters to JSON file - // - UTILS_NEXTFLOW_PIPELINE ( - version, - true, - outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 - ) - - // - // Validate parameters and generate parameter summary to stdout - // - pre_help_text = nfCoreLogo(monochrome_logs) - post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFVALIDATION_PLUGIN ( - help, - workflow_command, - pre_help_text, - post_help_text, - validate_params, - "nextflow_schema.json" - ) - - // - // Check config provided to the pipeline - // - UTILS_NFCORE_PIPELINE ( - nextflow_cli_args - ) - // - // Custom validation for pipeline parameters - // - validateInputParameters() - - // - // Create fasta channel - // - genome = params.genome ? params.genome : file(params.fasta, checkIfExists:true).getBaseName() - if (params.genome) { - genome = params.genome - ch_fasta = Channel.of([[genome:genome], getGenomeAttribute('fasta')]) - fai = getGenomeAttribute('fai') - if (fai == null) { - SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) - fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } - } - } else if (params.fasta) { - genome = file(params.fasta, checkIfExists:true).getBaseName() - ch_fasta = Channel.of([[genome:genome], file(params.fasta, checkIfExists:true)]) - if (params.fasta_fai) { - fai = file(params.fasta_fai, checkIfExists:true) - } else { - SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) - fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } - } - } - ch_ref_gen = ch_fasta.combine(fai) - - // - // Create map channel - // - ch_map = params.map ? - Channel.of([["map": params.map], params.map]) : - Channel.of([[],[]]) - - // - // Create channel from input file provided through params.input - // - ch_input = Channel - .fromSamplesheet("input") - .map { - meta, bam, bai -> - [ meta, bam, bai ] - } - - // - // Create channel for panel - // - if (params.panel) { - ch_panel = Channel - .fromSamplesheet("panel") - .map { - meta,vcf,index,sites,tsv,legend,phased -> - [ meta, vcf, index ] - } - } - - // - // Create channel from region input - // - if (params.input_region) { - if (params.input_region.endsWith(".csv")) { - println "Region file provided as input is a csv file" - ch_regions = Channel.fromSamplesheet("input_region") - .map{ chr, start, end -> [["chr": chr], chr + ":" + start + "-" + end]} - .map{ metaC, region -> [metaC + ["region": region], region]} - } else { - println "Region file provided is a single region" - GET_REGION ( - params.input_region, - ch_ref_gen - ) - ch_versions = ch_versions.mix(GET_REGION.out.versions.first()) - ch_regions = GET_REGION.out.regions - } - } - - emit: - input = ch_input // [ [meta], bam, bai ] - fasta = ch_ref_gen // [ [genome], fasta, fai ] - panel = ch_panel // [ [panel], panel ] - regions = ch_regions // [ [chr, region], region ] - map = ch_map // [ [map], map ] - versions = ch_versions -} - -/* -======================================================================================== - SUBWORKFLOW FOR PIPELINE COMPLETION -======================================================================================== -*/ - -workflow PIPELINE_COMPLETION { - - take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure - plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published - monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications - multiqc_report // string: Path to MultiQC report - - main: - - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - - // - // Completion email and summary - // - workflow.onComplete { - if (email || email_on_fail) { - completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) - } - - completionSummary(monochrome_logs) - - if (hook_url) { - imNotification(summary_params, hook_url) - } - } -} - -/* -======================================================================================== - FUNCTIONS -======================================================================================== -*/ -// -// Check and validate pipeline parameters -// -def validateInputParameters() { - genomeExistsError() - // Check that only genome or fasta is provided - assert params.genome == null || params.fasta == null, "Either --genome or --fasta must be provided" - assert !(params.genome == null && params.fasta == null), "Only one of --genome or --fasta must be provided" - - // Check that a step is provided - assert params.step, "A step must be provided" - - // Check that at least one tool is provided - assert params.tools, "No tools provided" -} - -// -// Validate channels from input samplesheet -// -def validateInputSamplesheet(input) { - def (meta, bam, bai) = input - // Check that individual IDs are unique - // no validation for the moment -} -// -// Get attribute from genome config file e.g. fasta -// -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null -} - -// -// Exit pipeline if incorrect --genome key provided -// -def genomeExistsError() { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - error(error_string) - } -} - -// -// Generate methods description for MultiQC -// -def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text -} - -def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text -} - -def methodsDescriptionText(mqc_methods_yaml) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = workflow.toMap() - meta["manifest_map"] = workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() - - - def methods_text = mqc_methods_yaml.text - - def engine = new groovy.text.SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html.toString() -} +// +// Subworkflow with functionality specific to the nf-core/phaseimpute pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' +include { GET_REGION } from '../get_region' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create fasta channel + // + genome = params.genome ? params.genome : file(params.fasta, checkIfExists:true).getBaseName() + if (params.genome) { + genome = params.genome + ch_fasta = Channel.of([[genome:genome], getGenomeAttribute('fasta')]) + fai = getGenomeAttribute('fai') + if (fai == null) { + SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) + fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } + } + } else if (params.fasta) { + genome = file(params.fasta, checkIfExists:true).getBaseName() + ch_fasta = Channel.of([[genome:genome], file(params.fasta, checkIfExists:true)]) + if (params.fasta_fai) { + fai = file(params.fasta_fai, checkIfExists:true) + } else { + SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) + fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } + } + } + ch_ref_gen = ch_fasta.combine(fai) + + // + // Create map channel + // + ch_map = params.map ? + Channel.of([["map": params.map], params.map]) : + Channel.of([[],[]]) + + // + // Create channel from input file provided through params.input + // + ch_input = Channel + .fromSamplesheet("input") + .map { + meta, bam, bai -> + [ meta, bam, bai ] + } + + // + // Create channel for panel + // + if (params.panel) { + ch_panel = Channel + .fromSamplesheet("panel") + .map { + meta,vcf,index,sites,tsv,legend,phased -> + [ meta, vcf, index ] + } + } + + // + // Create channel from region input + // + if (params.input_region) { + if (params.input_region.endsWith(".csv")) { + println "Region file provided as input is a csv file" + ch_regions = Channel.fromSamplesheet("input_region") + .map{ chr, start, end -> [["chr": chr], chr + ":" + start + "-" + end]} + .map{ metaC, region -> [metaC + ["region": region], region]} + } else { + println "Region file provided is a single region" + GET_REGION ( + params.input_region, + ch_ref_gen + ) + ch_versions = ch_versions.mix(GET_REGION.out.versions.first()) + ch_regions = GET_REGION.out.regions + } + } + + emit: + input = ch_input // [ [meta], bam, bai ] + fasta = ch_ref_gen // [ [genome], fasta, fai ] + panel = ch_panel // [ [panel], panel ] + regions = ch_regions // [ [chr, region], region ] + map = ch_map // [ [map], map ] + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() + // Check that only genome or fasta is provided + assert params.genome == null || params.fasta == null, "Either --genome or --fasta must be provided" + assert !(params.genome == null && params.fasta == null), "Only one of --genome or --fasta must be provided" + + // Check that a step is provided + assert params.step, "A step must be provided" + + // Check that at least one tool is provided + assert params.tools, "No tools provided" +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (meta, bam, bai) = input + // Check that individual IDs are unique + // no validation for the moment +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} From e9b99dad819365511c3374f79df895ede0f89804 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 21 Mar 2024 12:45:15 +0100 Subject: [PATCH 10/13] Update files --- CHANGELOG.md | 2 +- nextflow_schema.json | 28 ++++++++-------------------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c728319e..b96488af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository - [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed outdir from test config files -- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1 and shapeit5) +- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5) ### `Fixed` diff --git a/nextflow_schema.json b/nextflow_schema.json index aee0a340..fa8974a3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -37,9 +37,9 @@ "description": "Path to the reference panel or csv file with the list of panels", "fa_icon": "far fa-file-code", "format": "file-path", + "schema": "assets/schema_input_panel.json", "pattern": "^\\S+\\.(csv|tsv|txt)$", - "mimetype": "text/csv", - "schema": "assets/schema_input_panel.json" + "mimetype": "text/csv" }, "panel_index": { "type": "string", @@ -83,22 +83,9 @@ "input_region": { "type": "string", "description": "Region of the genome to use, can be the entire genome (i.e. 'all') or a specific chromosome or region (e.g. 'chr1', 'chr1:1000-2000'). You can also specify a file containing a list of regions to process, one per line. The file should be a comma-separated file with 3 columns, and a header row.", - "anyOf": [ - { - "pattern": "all" - }, - { - "pattern": "^(chr)?[0-9XYM]+$" - }, - { - "pattern": "^(chr)?[0-9XYM]+:[0-9]+-[0-9]+$" - }, - { - "format": "file-path", - "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input_region.json" - } - ] + "schema": "assets/schema_input_region.json", + "format": "file-path", + "pattern": "^\\S+\\.csv$" }, "outdir": { "type": "string", @@ -169,10 +156,11 @@ "format": "file-path", "exists": true, "mimetype": "text/plain", - "pattern": "^\\S+\\.gmap(sta)?(\\.gz)?$", "description": "Path to gmap genome file.", "help_text": "This parameter is *optional*. This is used to refine the imputation process to match the recombination event rate in your specie.", - "fa_icon": "far fa-file-code" + "fa_icon": "far fa-file-code", + "mimetype": "text/csv", + "schema": "assets/schema_map.json" }, "igenomes_ignore": { "type": "boolean", From eecde608a2a1da8a922062e9d4edf4b456a0f61f Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 21 Mar 2024 12:48:03 +0100 Subject: [PATCH 11/13] Update modules.json --- modules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 77bda261..0a99d93d 100644 --- a/modules.json +++ b/modules.json @@ -70,7 +70,7 @@ }, "glimpse2/ligate": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "ee7fee68281944b002bd27a8ff3f19200b4d3fad", "installed_by": ["multiple_impute_glimpse2"] }, "glimpse2/phase": { @@ -136,7 +136,7 @@ "nf-core": { "multiple_impute_glimpse2": { "branch": "master", - "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { From 2fa485cbb11de990bb62610c70c6cddaf6558112 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 21 Mar 2024 12:54:47 +0100 Subject: [PATCH 12/13] Fix linting with eclint --- subworkflows/local/compute_gl/main.nf | 76 +-- subworkflows/local/get_panel/main.nf | 180 ++--- .../utils_nfcore_phaseimpute_pipeline/main.nf | 624 +++++++++--------- 3 files changed, 440 insertions(+), 440 deletions(-) diff --git a/subworkflows/local/compute_gl/main.nf b/subworkflows/local/compute_gl/main.nf index 2011561d..277933d1 100644 --- a/subworkflows/local/compute_gl/main.nf +++ b/subworkflows/local/compute_gl/main.nf @@ -1,38 +1,38 @@ -include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main.nf' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' - - -workflow COMPUTE_GL { - - take: - ch_input // channel: [ [id, ref], bam, bai ] - ch_target // channel: [ [panel], sites, tsv] - ch_fasta // channel: [ [ref], fasta, fai] - - main: - - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - - ch_mpileup = ch_input - .combine(ch_target) - .map{metaI, bam, bai, metaP, sites, tsv -> - [metaI + metaP, bam, sites, tsv]} - - BCFTOOLS_MPILEUP( - ch_mpileup, - ch_fasta, - false - ) - ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions.first()) - - ch_output = BCFTOOLS_MPILEUP.out.vcf - .combine(BCFTOOLS_MPILEUP.out.tbi, by:0) - - ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_MPILEUP.out.stats.map{ it[1] }) - - emit: - vcf = ch_output // channel: [ [id, panel], vcf, tbi ] - versions = ch_versions // channel: [ versions.yml ] - multiqc_files = ch_multiqc_files -} +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main.nf' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' + + +workflow COMPUTE_GL { + + take: + ch_input // channel: [ [id, ref], bam, bai ] + ch_target // channel: [ [panel], sites, tsv] + ch_fasta // channel: [ [ref], fasta, fai] + + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + ch_mpileup = ch_input + .combine(ch_target) + .map{metaI, bam, bai, metaP, sites, tsv -> + [metaI + metaP, bam, sites, tsv]} + + BCFTOOLS_MPILEUP( + ch_mpileup, + ch_fasta, + false + ) + ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions.first()) + + ch_output = BCFTOOLS_MPILEUP.out.vcf + .combine(BCFTOOLS_MPILEUP.out.tbi, by:0) + + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_MPILEUP.out.stats.map{ it[1] }) + + emit: + vcf = ch_output // channel: [ [id, panel], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files +} diff --git a/subworkflows/local/get_panel/main.nf b/subworkflows/local/get_panel/main.nf index 69ffcb8f..4539647b 100644 --- a/subworkflows/local/get_panel/main.nf +++ b/subworkflows/local/get_panel/main.nf @@ -1,90 +1,90 @@ -include { BCFTOOLS_VIEW as VIEW_VCF_SNPS } from '../../../modules/nf-core/bcftools/view/main.nf' -include { BCFTOOLS_VIEW as VIEW_VCF_SITES } from '../../../modules/nf-core/bcftools/view/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX1 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX3 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX4 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX5 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main.nf' -include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query/main.nf' -include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' -include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/nf-core/vcf_phase_shapeit5/main' - - -workflow GET_PANEL { - take: - ch_vcf // channel: [ [id], vcf, index ] - ch_fasta // channel: [ [genome], fasta, fai ] - - main: - - ch_versions = Channel.empty() - - BCFTOOLS_NORM(ch_vcf, ch_fasta.map{ genome, fasta, fai -> [genome, fasta] }) - ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) - - // Extract only the SNP - VIEW_VCF_SNPS(BCFTOOLS_NORM.out.vcf // [ meta, vcf ] - .combine(Channel.of([[]])), [], [], []) - ch_versions = ch_versions.mix(VIEW_VCF_SNPS.out.versions.first()) - - VCF_INDEX3(VIEW_VCF_SNPS.out.vcf) - ch_versions = ch_versions.mix(VCF_INDEX3.out.versions.first()) - - ch_panel_norm = VIEW_VCF_SNPS.out.vcf - .combine(VCF_INDEX3.out.csi, by:0) - - // Extract sites positions - vcf_region = VIEW_VCF_SNPS.out.vcf - .combine(VCF_INDEX3.out.csi, by:0) - VIEW_VCF_SITES( ch_panel_norm, - [], [], []) - ch_versions = ch_versions.mix(VIEW_VCF_SITES.out.versions.first()) - - VCF_INDEX4(VIEW_VCF_SITES.out.vcf) - ch_versions = ch_versions.mix(VCF_INDEX4.out.versions.first()) - - ch_panel_sites = VIEW_VCF_SITES.out.vcf - .combine(VCF_INDEX4.out.csi, by:0) - - // Convert to TSV - BCFTOOLS_QUERY(ch_panel_sites, - [], [], []) - ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) - - TABIX_BGZIP(BCFTOOLS_QUERY.out.output) - ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) - - TABIX_TABIX(TABIX_BGZIP.out.output) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) - - ch_panel_tsv = TABIX_BGZIP.out.output - .combine(TABIX_TABIX.out.tbi, by: 0) - - // Phase panel - if (params.phased == false) { - VCF_PHASE_SHAPEIT5(vcf_region - .map { meta, vcf, csi -> [meta, vcf, csi, [], meta.region] }, - Channel.of([[],[],[]]).collect(), - Channel.of([[],[],[]]).collect(), - Channel.of([[],[]]).collect()) - ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions.first()) - ch_panel_phased = VCF_PHASE_SHAPEIT5.out.variants_phased - .combine(VCF_PHASE_SHAPEIT5.out.variants_index, by: 0) - } else { - ch_panel_phased = VIEW_VCF_SNPS.out.vcf - .combine(VCF_INDEX3.out.csi, by: 0) - } - - ch_panel = ch_panel_norm - .combine(ch_panel_sites, by: 0) - .combine(ch_panel_tsv, by: 0) - .combine(ch_panel_phased, by: 0) - .map{ metaI, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [[panel:metaI.id], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] - } - - emit: - panel = ch_panel // channel: [ [panel], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] - versions = ch_versions // channel: [ versions.yml ] -} +include { BCFTOOLS_VIEW as VIEW_VCF_SNPS } from '../../../modules/nf-core/bcftools/view/main.nf' +include { BCFTOOLS_VIEW as VIEW_VCF_SITES } from '../../../modules/nf-core/bcftools/view/main.nf' +include { BCFTOOLS_INDEX as VCF_INDEX1 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as VCF_INDEX3 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as VCF_INDEX4 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as VCF_INDEX5 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main.nf' +include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query/main.nf' +include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' +include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/nf-core/vcf_phase_shapeit5/main' + + +workflow GET_PANEL { + take: + ch_vcf // channel: [ [id], vcf, index ] + ch_fasta // channel: [ [genome], fasta, fai ] + + main: + + ch_versions = Channel.empty() + + BCFTOOLS_NORM(ch_vcf, ch_fasta.map{ genome, fasta, fai -> [genome, fasta] }) + ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) + + // Extract only the SNP + VIEW_VCF_SNPS(BCFTOOLS_NORM.out.vcf // [ meta, vcf ] + .combine(Channel.of([[]])), [], [], []) + ch_versions = ch_versions.mix(VIEW_VCF_SNPS.out.versions.first()) + + VCF_INDEX3(VIEW_VCF_SNPS.out.vcf) + ch_versions = ch_versions.mix(VCF_INDEX3.out.versions.first()) + + ch_panel_norm = VIEW_VCF_SNPS.out.vcf + .combine(VCF_INDEX3.out.csi, by:0) + + // Extract sites positions + vcf_region = VIEW_VCF_SNPS.out.vcf + .combine(VCF_INDEX3.out.csi, by:0) + VIEW_VCF_SITES( ch_panel_norm, + [], [], []) + ch_versions = ch_versions.mix(VIEW_VCF_SITES.out.versions.first()) + + VCF_INDEX4(VIEW_VCF_SITES.out.vcf) + ch_versions = ch_versions.mix(VCF_INDEX4.out.versions.first()) + + ch_panel_sites = VIEW_VCF_SITES.out.vcf + .combine(VCF_INDEX4.out.csi, by:0) + + // Convert to TSV + BCFTOOLS_QUERY(ch_panel_sites, + [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) + + TABIX_BGZIP(BCFTOOLS_QUERY.out.output) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) + + TABIX_TABIX(TABIX_BGZIP.out.output) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) + + ch_panel_tsv = TABIX_BGZIP.out.output + .combine(TABIX_TABIX.out.tbi, by: 0) + + // Phase panel + if (params.phased == false) { + VCF_PHASE_SHAPEIT5(vcf_region + .map { meta, vcf, csi -> [meta, vcf, csi, [], meta.region] }, + Channel.of([[],[],[]]).collect(), + Channel.of([[],[],[]]).collect(), + Channel.of([[],[]]).collect()) + ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions.first()) + ch_panel_phased = VCF_PHASE_SHAPEIT5.out.variants_phased + .combine(VCF_PHASE_SHAPEIT5.out.variants_index, by: 0) + } else { + ch_panel_phased = VIEW_VCF_SNPS.out.vcf + .combine(VCF_INDEX3.out.csi, by: 0) + } + + ch_panel = ch_panel_norm + .combine(ch_panel_sites, by: 0) + .combine(ch_panel_tsv, by: 0) + .combine(ch_panel_phased, by: 0) + .map{ metaI, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + -> [[panel:metaI.id], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] + } + + emit: + panel = ch_panel // channel: [ [panel], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 2a26704d..164b78d5 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -1,312 +1,312 @@ -// -// Subworkflow with functionality specific to the nf-core/phaseimpute pipeline -// - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { fromSamplesheet } from 'plugin/nf-validation' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' -include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' -include { GET_REGION } from '../get_region' -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' - -/* -======================================================================================== - SUBWORKFLOW TO INITIALISE PIPELINE -======================================================================================== -*/ - -workflow PIPELINE_INITIALISATION { - - take: - version // boolean: Display version and exit - help // boolean: Display help text - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs - nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet - - main: - - ch_versions = Channel.empty() - - // - // Print version and exit if required and dump pipeline parameters to JSON file - // - UTILS_NEXTFLOW_PIPELINE ( - version, - true, - outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 - ) - - // - // Validate parameters and generate parameter summary to stdout - // - pre_help_text = nfCoreLogo(monochrome_logs) - post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFVALIDATION_PLUGIN ( - help, - workflow_command, - pre_help_text, - post_help_text, - validate_params, - "nextflow_schema.json" - ) - - // - // Check config provided to the pipeline - // - UTILS_NFCORE_PIPELINE ( - nextflow_cli_args - ) - // - // Custom validation for pipeline parameters - // - validateInputParameters() - - // - // Create fasta channel - // - genome = params.genome ? params.genome : file(params.fasta, checkIfExists:true).getBaseName() - if (params.genome) { - genome = params.genome - ch_fasta = Channel.of([[genome:genome], getGenomeAttribute('fasta')]) - fai = getGenomeAttribute('fai') - if (fai == null) { - SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) - fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } - } - } else if (params.fasta) { - genome = file(params.fasta, checkIfExists:true).getBaseName() - ch_fasta = Channel.of([[genome:genome], file(params.fasta, checkIfExists:true)]) - if (params.fasta_fai) { - fai = file(params.fasta_fai, checkIfExists:true) - } else { - SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) - fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } - } - } - ch_ref_gen = ch_fasta.combine(fai) - - // - // Create map channel - // - ch_map = params.map ? - Channel.of([["map": params.map], params.map]) : - Channel.of([[],[]]) - - // - // Create channel from input file provided through params.input - // - ch_input = Channel - .fromSamplesheet("input") - .map { - meta, bam, bai -> - [ meta, bam, bai ] - } - - // - // Create channel for panel - // - if (params.panel) { - ch_panel = Channel - .fromSamplesheet("panel") - .map { - meta,vcf,index,sites,tsv,legend,phased -> - [ meta, vcf, index ] - } - } - - // - // Create channel from region input - // - if (params.input_region) { - if (params.input_region.endsWith(".csv")) { - println "Region file provided as input is a csv file" - ch_regions = Channel.fromSamplesheet("input_region") - .map{ chr, start, end -> [["chr": chr], chr + ":" + start + "-" + end]} - .map{ metaC, region -> [metaC + ["region": region], region]} - } else { - println "Region file provided is a single region" - GET_REGION ( - params.input_region, - ch_ref_gen - ) - ch_versions = ch_versions.mix(GET_REGION.out.versions.first()) - ch_regions = GET_REGION.out.regions - } - } - - emit: - input = ch_input // [ [meta], bam, bai ] - fasta = ch_ref_gen // [ [genome], fasta, fai ] - panel = ch_panel // [ [panel], panel ] - regions = ch_regions // [ [chr, region], region ] - map = ch_map // [ [map], map ] - versions = ch_versions -} - -/* -======================================================================================== - SUBWORKFLOW FOR PIPELINE COMPLETION -======================================================================================== -*/ - -workflow PIPELINE_COMPLETION { - - take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure - plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published - monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications - multiqc_report // string: Path to MultiQC report - - main: - - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - - // - // Completion email and summary - // - workflow.onComplete { - if (email || email_on_fail) { - completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) - } - - completionSummary(monochrome_logs) - - if (hook_url) { - imNotification(summary_params, hook_url) - } - } -} - -/* -======================================================================================== - FUNCTIONS -======================================================================================== -*/ -// -// Check and validate pipeline parameters -// -def validateInputParameters() { - genomeExistsError() - // Check that only genome or fasta is provided - assert params.genome == null || params.fasta == null, "Either --genome or --fasta must be provided" - assert !(params.genome == null && params.fasta == null), "Only one of --genome or --fasta must be provided" - - // Check that a step is provided - assert params.step, "A step must be provided" - - // Check that at least one tool is provided - assert params.tools, "No tools provided" -} - -// -// Validate channels from input samplesheet -// -def validateInputSamplesheet(input) { - def (meta, bam, bai) = input - // Check that individual IDs are unique - // no validation for the moment -} -// -// Get attribute from genome config file e.g. fasta -// -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null -} - -// -// Exit pipeline if incorrect --genome key provided -// -def genomeExistsError() { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - error(error_string) - } -} - -// -// Generate methods description for MultiQC -// -def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text -} - -def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text -} - -def methodsDescriptionText(mqc_methods_yaml) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = workflow.toMap() - meta["manifest_map"] = workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() - - - def methods_text = mqc_methods_yaml.text - - def engine = new groovy.text.SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html.toString() -} +// +// Subworkflow with functionality specific to the nf-core/phaseimpute pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' +include { GET_REGION } from '../get_region' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create fasta channel + // + genome = params.genome ? params.genome : file(params.fasta, checkIfExists:true).getBaseName() + if (params.genome) { + genome = params.genome + ch_fasta = Channel.of([[genome:genome], getGenomeAttribute('fasta')]) + fai = getGenomeAttribute('fai') + if (fai == null) { + SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) + fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } + } + } else if (params.fasta) { + genome = file(params.fasta, checkIfExists:true).getBaseName() + ch_fasta = Channel.of([[genome:genome], file(params.fasta, checkIfExists:true)]) + if (params.fasta_fai) { + fai = file(params.fasta_fai, checkIfExists:true) + } else { + SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) + fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } + } + } + ch_ref_gen = ch_fasta.combine(fai) + + // + // Create map channel + // + ch_map = params.map ? + Channel.of([["map": params.map], params.map]) : + Channel.of([[],[]]) + + // + // Create channel from input file provided through params.input + // + ch_input = Channel + .fromSamplesheet("input") + .map { + meta, bam, bai -> + [ meta, bam, bai ] + } + + // + // Create channel for panel + // + if (params.panel) { + ch_panel = Channel + .fromSamplesheet("panel") + .map { + meta,vcf,index,sites,tsv,legend,phased -> + [ meta, vcf, index ] + } + } + + // + // Create channel from region input + // + if (params.input_region) { + if (params.input_region.endsWith(".csv")) { + println "Region file provided as input is a csv file" + ch_regions = Channel.fromSamplesheet("input_region") + .map{ chr, start, end -> [["chr": chr], chr + ":" + start + "-" + end]} + .map{ metaC, region -> [metaC + ["region": region], region]} + } else { + println "Region file provided is a single region" + GET_REGION ( + params.input_region, + ch_ref_gen + ) + ch_versions = ch_versions.mix(GET_REGION.out.versions.first()) + ch_regions = GET_REGION.out.regions + } + } + + emit: + input = ch_input // [ [meta], bam, bai ] + fasta = ch_ref_gen // [ [genome], fasta, fai ] + panel = ch_panel // [ [panel], panel ] + regions = ch_regions // [ [chr, region], region ] + map = ch_map // [ [map], map ] + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() + // Check that only genome or fasta is provided + assert params.genome == null || params.fasta == null, "Either --genome or --fasta must be provided" + assert !(params.genome == null && params.fasta == null), "Only one of --genome or --fasta must be provided" + + // Check that a step is provided + assert params.step, "A step must be provided" + + // Check that at least one tool is provided + assert params.tools, "No tools provided" +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (meta, bam, bai) = input + // Check that individual IDs are unique + // no validation for the moment +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} From c46364d4877268321647e4c70830cc27ed917223 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 21 Mar 2024 13:02:28 +0100 Subject: [PATCH 13/13] Update nextflow_schema --- nextflow_schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index fa8974a3..2e36b1c4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -155,7 +155,6 @@ "type": "string", "format": "file-path", "exists": true, - "mimetype": "text/plain", "description": "Path to gmap genome file.", "help_text": "This parameter is *optional*. This is used to refine the imputation process to match the recombination event rate in your specie.", "fa_icon": "far fa-file-code",