From 73c04e23c161a2aa68b120bd6737cdc7c7057a4e Mon Sep 17 00:00:00 2001 From: Louis Date: Fri, 22 Mar 2024 19:17:22 +0100 Subject: [PATCH 01/18] Add auto detection of chr prefix and auto fixing --- conf/modules.config | 4 +- modules/local/faitochr/main.nf | 23 ++++--- modules/local/vcfchrextract/environment.yml | 7 ++ modules/local/vcfchrextract/main.nf | 49 +++++++++++++ modules/local/vcfchrextract/meta.yml | 41 +++++++++++ .../local/vcfchrextract/tests/main.nf.test | 32 +++++++++ .../vcfchrextract/tests/main.nf.test.snap | 35 ++++++++++ modules/local/vcfchrextract/tests/tags.yml | 2 + nextflow.config | 2 +- nextflow_schema.json | 12 ++-- subworkflows/local/vcf_chr_rename/main.nf | 68 +++++++++++++++---- .../local/vcf_chr_rename/tests/main.nf.test | 59 +++++++++++++--- .../vcf_chr_rename/tests/main.nf.test.snap | 51 +++++++++++++- .../vcf_chr_rename/tests/nextflow.config | 1 + .../tests/nextflow_rename.config | 4 ++ workflows/phaseimpute/main.nf | 16 ++--- 16 files changed, 353 insertions(+), 53 deletions(-) create mode 100644 modules/local/vcfchrextract/environment.yml create mode 100644 modules/local/vcfchrextract/main.nf create mode 100644 modules/local/vcfchrextract/meta.yml create mode 100644 modules/local/vcfchrextract/tests/main.nf.test create mode 100644 modules/local/vcfchrextract/tests/main.nf.test.snap create mode 100644 modules/local/vcfchrextract/tests/tags.yml create mode 100644 subworkflows/local/vcf_chr_rename/tests/nextflow_rename.config diff --git a/conf/modules.config b/conf/modules.config index a04bf589..76b55b44 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -56,12 +56,12 @@ process { ext.prefix = { "${meta.id}_${meta.region}" } } - withName: BCFTOOLS_ANNOTATE { + withName: 'VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' { ext.args = [ "-Oz", "--no-version" ].join(' ') - ext.prefix = { "${meta.id}_chrDel_${meta.region}" } + ext.prefix = { "${meta.id}_chrrename" } } withName: VIEW_VCF_SNPS { diff --git a/modules/local/faitochr/main.nf b/modules/local/faitochr/main.nf index 9ae36d98..6c81bb6e 100644 --- a/modules/local/faitochr/main.nf +++ b/modules/local/faitochr/main.nf @@ -3,7 +3,7 @@ process FAITOCHR { label 'process_single' input: - tuple val(meta), path(fai), val(addchr) + tuple val(meta), path(fai) output: tuple val(meta), path("*.txt"), emit: annot_chr @@ -17,17 +17,24 @@ process FAITOCHR { def prefix = task.ext.prefix ?: "${meta.id}" """ - # Take the fai file and add the chr prefix to the chromosome names - if [ "${addchr}" = true ]; then + # Check if chr prefix is present in the chromosome names + col1="chr" + col2="" + if [ \$(awk 'NR==1 {print \$1}' ${fai} | grep -c '^chr') -eq 1 ]; then col1="" col2="chr" - else - col1="chr" - col2="" fi + + # Take the fai file and add/remove the chr prefix to the chromosome names + # Keep only first column, remove chr prefix if present, add chr prefix if needed + # chr prefix is added only on number only chromosome names awk -F'\t' '{print \$1}' ${fai} | \ - sed 's/chr//g' | \ - awk -v col1=\${col1} -v col2=\${col2} 'BEGIN {OFS=" "} {print col1\$1, col2\$1}' > ${prefix}.txt + sed 's/^chr//g' | \ + awk -v col1=\${col1} -v col2=\${col2} \ + 'BEGIN {OFS=" "} {if (\$1 ~ /^[0-9]+\$/) print col1\$1, col2\$1; else print \$1, \$1}' \ + > ${prefix}.txt + + # We should have a file with the chromosome names in the second column corresponding to the fai format cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/vcfchrextract/environment.yml b/modules/local/vcfchrextract/environment.yml new file mode 100644 index 00000000..e0abc8d2 --- /dev/null +++ b/modules/local/vcfchrextract/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/local/vcfchrextract/main.nf b/modules/local/vcfchrextract/main.nf new file mode 100644 index 00000000..ee58fe41 --- /dev/null +++ b/modules/local/vcfchrextract/main.nf @@ -0,0 +1,49 @@ +process VCFCHREXTRACT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.txt"), emit: chr + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bcftools \\ + head \\ + $input \\ + \| grep -o -E '^##contig=]*)' | cut -d'=' -f3 \\ + > ${prefix}.txt + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + grep: \$( grep --version |& sed '1!d; s/^.*grep (GNU grep) //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + grep: \$( grep --version |& sed '1!d; s/^.*grep (GNU grep) //' ) + END_VERSIONS + """ +} diff --git a/modules/local/vcfchrextract/meta.yml b/modules/local/vcfchrextract/meta.yml new file mode 100644 index 00000000..19d523d4 --- /dev/null +++ b/modules/local/vcfchrextract/meta.yml @@ -0,0 +1,41 @@ +name: vcfchrextract +description: Extract all contigs name into txt file +keywords: + - bcftools + - vcf + - head + - contig +tools: + - head: + description: Extract header from variant calling file. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#head + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - chr: + type: file + description: List of contigs in the VCF file + pattern: "*{txt}" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/local/vcfchrextract/tests/main.nf.test b/modules/local/vcfchrextract/tests/main.nf.test new file mode 100644 index 00000000..a004135b --- /dev/null +++ b/modules/local/vcfchrextract/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process VCFCHREXTRACT" + script "../main.nf" + process "VCFCHREXTRACT" + + tag "modules" + tag "modules_local" + tag "vcfchrextract" + + test("Extract chr from vcf") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/local/vcfchrextract/tests/main.nf.test.snap b/modules/local/vcfchrextract/tests/main.nf.test.snap new file mode 100644 index 00000000..3431bbe9 --- /dev/null +++ b/modules/local/vcfchrextract/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "Extract chr from vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt:md5,3a9ea6d336e113a74d7fdca5e7b623fc" + ] + ], + "1": [ + "versions.yml:md5,7e6d75a47df5ce3a975172dcd47fd247" + ], + "chr": [ + [ + { + "id": "test" + }, + "test.txt:md5,3a9ea6d336e113a74d7fdca5e7b623fc" + ] + ], + "versions": [ + "versions.yml:md5,7e6d75a47df5ce3a975172dcd47fd247" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T15:09:21.585363234" + } +} \ No newline at end of file diff --git a/modules/local/vcfchrextract/tests/tags.yml b/modules/local/vcfchrextract/tests/tags.yml new file mode 100644 index 00000000..429a601f --- /dev/null +++ b/modules/local/vcfchrextract/tests/tags.yml @@ -0,0 +1,2 @@ +vcfchrextract: + - "modules/local/vcfchrextract/**" diff --git a/nextflow.config b/nextflow.config index 882f501c..ba21f779 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,7 +22,7 @@ params { panel = null panel_index = null phased = null - panel_chr_rename = null + rename_chr = false // References genome = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 2e36b1c4..f3df5871 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -52,13 +52,6 @@ "description": "Is the reference panel phased", "type": "boolean", "pattern": "true|false" - }, - "panel_chr_rename": { - "type": "string", - "description": "Rename the chromosome of the panel", - "pattern": "^\\S+\\.(csv|tsv|txt)$", - "format": "file-path", - "mimetype": "text/csv" } } }, @@ -93,6 +86,11 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, + "rename_chr": { + "type": "boolean", + "description": "Should the vcf files be renamed to match the reference genome (e.g. 'chr1' -> '1')", + "pattern": "true|false" + }, "email": { "type": "string", "description": "Email address for completion summary.", diff --git a/subworkflows/local/vcf_chr_rename/main.nf b/subworkflows/local/vcf_chr_rename/main.nf index 352f2bd1..6872e7b1 100644 --- a/subworkflows/local/vcf_chr_rename/main.nf +++ b/subworkflows/local/vcf_chr_rename/main.nf @@ -1,28 +1,70 @@ include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' +include { FAITOCHR } from '../../../modules/local/faitochr/main.nf' +include { VCFCHREXTRACT } from '../../../modules/local/vcfchrextract/main.nf' workflow VCF_CHR_RENAME { take: - ch_vcf // channel: [ [id, ref], vcf, csi ] - file_chr_rename // file + ch_vcf // channel: [ [id], vcf, index ] + ch_fasta // channel: [ [id], fasta, fai ] main: ch_versions = Channel.empty() - // Rename the chromosome without prefix - BCFTOOLS_ANNOTATE(ch_vcf - .combine(Channel.of([[], [], []])) - .combine(Channel.of(file_chr_rename)) - ) + // Get contig names from the VCF + VCFCHREXTRACT(ch_vcf.map{ metaV, vcf, csi -> [metaV, vcf] }) - VCF_INDEX(BCFTOOLS_ANNOTATE.out.vcf) - ch_versions = ch_versions.mix(VCF_INDEX.out.versions.first()) + // Check if the contig names are the same as the reference + chr_disjoint = VCFCHREXTRACT.out.chr + .combine(ch_vcf, by:0) + .combine(ch_fasta) + .map{metaI, chr, vcf, csi, metaG, fasta, fai -> + [ + metaI, vcf, csi, + chr.readLines()*.split(' ').collect{it[0]}, + fai.readLines()*.split('\t').collect{it[0]} + ] + } + .map { meta, vcf, csi, chr, fai -> + [meta, vcf, csi, (chr-fai).size()] + } + .branch{ + no_rename: it[3] == 0 + to_rename: it[3] > 0 + } - ch_vcf_rename = BCFTOOLS_ANNOTATE.out.vcf - .combine(VCF_INDEX.out.csi) + if (chr_disjoint.to_rename.ifEmpty(true) != true){ + if (params.rename_chr == true) { + println 'Some contig names in the VCF do not match the reference genome. Renaming the contigs by adding / removing "chr" prefix ...' + // Generate the chromosome renaming file + FAITOCHR(ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }) + ch_versions = ch_versions.mix(FAITOCHR.out.versions) + + // Rename the chromosome without prefix + BCFTOOLS_ANNOTATE(chr_disjoint.to_rename.map{ meta, vcf, csi, chr -> [meta, vcf, csi] } + .combine(Channel.of([[], [], []])) + .combine(FAITOCHR.out.annot_chr.map{it[1]}) + ) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) + + BCFTOOLS_INDEX(BCFTOOLS_ANNOTATE.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions.first()) + + ch_vcf_renamed = BCFTOOLS_ANNOTATE.out.vcf + .combine(BCFTOOLS_INDEX.out.csi, by:0) + + ch_vcf_out = chr_disjoint.no_rename + .map{meta, vcf, csi, chr -> [meta, vcf, csi]} + .mix(ch_vcf_renamed) + } else { + error 'Some contig names in the VCF do not match the reference genome. Please set `rename_chr` to `true` to rename the contigs.' + } + } else { + ch_vcf_out = ch_vcf + } emit: - vcf_rename = ch_vcf_rename // [ meta, vcf, csi ] + vcf = ch_vcf_out // [ meta, vcf, csi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_chr_rename/tests/main.nf.test b/subworkflows/local/vcf_chr_rename/tests/main.nf.test index d045d36a..2b93cb35 100644 --- a/subworkflows/local/vcf_chr_rename/tests/main.nf.test +++ b/subworkflows/local/vcf_chr_rename/tests/main.nf.test @@ -2,7 +2,6 @@ nextflow_workflow { name "Test Subworkflow VCF_CHR_RENAME" script "../main.nf" - config "./nextflow.config" workflow "VCF_CHR_RENAME" @@ -14,18 +13,30 @@ nextflow_workflow { tag "bcftools" tag "bcftools/annotate" tag "bcftools/index" + tag "faitochr" test("Should run without error") { - + config "./nextflow_rename.config" when { workflow { """ - input[0] = Channel.of([ - [id: "input", genome:"GRCh37"], - "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz", - "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi" + input[0] = Channel.fromList([ + [ + [id: "multi"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi",checkIfExist:true) + ], + [ + [id: "chr21"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true) + ] + ]) + input[1] = Channel.of([ + [id:"GRCh37"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa.fai",checkIfExist:true) ]) - input[1] = file("../../../assets/chr_rename_add.txt", exist: true) """ } } @@ -33,7 +44,39 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out.ch_region).match() } + { assert snapshot(workflow.out).match() } + ) + } + } + test("Should run with error") { + config "./nextflow.config" + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "chr22"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf.gz.tbi",checkIfExist:true) + ], + [ + [id: "chr21"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true) + ] + ]) + input[1] = Channel.of([ + [id:"GRCh37"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai",checkIfExist:true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, ) } } diff --git a/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap b/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap index 3aee8a9e..a9d24208 100644 --- a/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap @@ -1,10 +1,55 @@ { "Should run without error": { - "content": null, + "content": [ + { + "0": [ + [ + { + "id": "chr21" + }, + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi" + ], + [ + { + "id": "multi" + }, + "multi_chrrename.vcf.gz:md5,5f6f1ca261270d55eec054368f3d9587", + "multi_chrrename.vcf.gz.csi:md5,5d175780d5611d962430bff3377f649f" + ] + ], + "1": [ + "versions.yml:md5,176431a832f84d4c329f6d1e9c74d203", + "versions.yml:md5,3698013e288e15d392e1cd3e22d2022a", + "versions.yml:md5,924dd2e49d998f8f0da93799e62196f7" + ], + "vcf": [ + [ + { + "id": "chr21" + }, + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi" + ], + [ + { + "id": "multi" + }, + "multi_chrrename.vcf.gz:md5,5f6f1ca261270d55eec054368f3d9587", + "multi_chrrename.vcf.gz.csi:md5,5d175780d5611d962430bff3377f649f" + ] + ], + "versions": [ + "versions.yml:md5,176431a832f84d4c329f6d1e9c74d203", + "versions.yml:md5,3698013e288e15d392e1cd3e22d2022a", + "versions.yml:md5,924dd2e49d998f8f0da93799e62196f7" + ] + } + ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-13T12:47:49.775995" + "timestamp": "2024-03-22T19:14:31.190183685" } -} +} \ No newline at end of file diff --git a/subworkflows/local/vcf_chr_rename/tests/nextflow.config b/subworkflows/local/vcf_chr_rename/tests/nextflow.config index 227aed3d..8edef9d8 100644 --- a/subworkflows/local/vcf_chr_rename/tests/nextflow.config +++ b/subworkflows/local/vcf_chr_rename/tests/nextflow.config @@ -1,3 +1,4 @@ params { max_memory = '7.GB' + rename_chr = false } diff --git a/subworkflows/local/vcf_chr_rename/tests/nextflow_rename.config b/subworkflows/local/vcf_chr_rename/tests/nextflow_rename.config new file mode 100644 index 00000000..2abf982b --- /dev/null +++ b/subworkflows/local/vcf_chr_rename/tests/nextflow_rename.config @@ -0,0 +1,4 @@ +params { + max_memory = '7.GB' + rename_chr = true +} diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 1cde520b..bd57deca 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -81,16 +81,10 @@ workflow PHASEIMPUTE { // if (params.step == 'impute' || params.step == 'panel_prep') { // Remove if necessary "chr" - if (params.panel_chr_rename != null) { - print("Need to rename the chromosome prefix of the panel") - VCF_CHR_RENAME(ch_panel, params.panel_chr_rename) - ch_panel = VCF_CHR_RENAME.out.vcf_rename - } + //VCF_CHR_RENAME(ch_panel, ch_fasta) - if (ch_panel.map{it[3] == null}.any()) { - print("Need to compute the sites and tsv files for the panel") - GET_PANEL(ch_panel, ch_fasta) - } + // Prepare the panel + GET_PANEL(ch_panel, ch_fasta) ch_versions = ch_versions.mix(GET_PANEL.out.versions.first()) @@ -138,12 +132,10 @@ workflow PHASEIMPUTE { ch_impute_output = ch_impute_output.mix(output_glimpse1) } if (params.tools.contains("glimpse2")) { - print("Impute with Glimpse2") error "Glimpse2 not yet implemented" // Glimpse2 subworkflow } if (params.tools.contains("quilt")) { - print("Impute with quilt") error "Quilt not yet implemented" // Quilt subworkflow } @@ -153,10 +145,12 @@ workflow PHASEIMPUTE { } if (params.step == 'validate') { + print("Validate imputed data") error "validate step not yet implemented" } if (params.step == 'refine') { + print("Refine imputed data") error "refine step not yet implemented" } From 588d91e2323ad450938abfaa914d92c062e492a6 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 22 Mar 2024 19:20:02 +0100 Subject: [PATCH 02/18] Delete println --- workflows/phaseimpute/main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index bd57deca..4f950bfb 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -145,12 +145,10 @@ workflow PHASEIMPUTE { } if (params.step == 'validate') { - print("Validate imputed data") error "validate step not yet implemented" } if (params.step == 'refine') { - print("Refine imputed data") error "refine step not yet implemented" } From 8865e7a2677f1d4ace1bb068ae3b69569a96a346 Mon Sep 17 00:00:00 2001 From: Louis Date: Fri, 22 Mar 2024 19:28:02 +0100 Subject: [PATCH 03/18] Remove trailing whitespace --- modules/local/vcfchrextract/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/vcfchrextract/main.nf b/modules/local/vcfchrextract/main.nf index ee58fe41..f6559930 100644 --- a/modules/local/vcfchrextract/main.nf +++ b/modules/local/vcfchrextract/main.nf @@ -25,7 +25,7 @@ process VCFCHREXTRACT { $input \\ \| grep -o -E '^##contig=]*)' | cut -d'=' -f3 \\ > ${prefix}.txt - + cat <<-END_VERSIONS > versions.yml "${task.process}": From af071f2f00cf8227a0dce8c8263e65a078574bdd Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 24 Mar 2024 14:22:17 +0100 Subject: [PATCH 04/18] Separate check to function, and renaming from check --- conf/modules.config | 2 +- subworkflows/local/vcf_chr_check/main.nf | 73 ++++++++++++ .../vcf_chr_check/tests/main.function.test | 0 .../local/vcf_chr_check/tests/main.nf.test | 110 ++++++++++++++++++ .../vcf_chr_check/tests/main.nf.test.snap | 51 ++++++++ .../local/vcf_chr_check/tests/nextflow.config | 4 + .../tests/nextflow_rename.config | 0 .../local/vcf_chr_check/tests/tags.yml | 2 + subworkflows/local/vcf_chr_rename/main.nf | 65 +++-------- .../local/vcf_chr_rename/tests/main.nf.test | 35 +----- .../vcf_chr_rename/tests/main.nf.test.snap | 12 +- .../vcf_chr_rename/tests/nextflow.config | 1 - workflows/phaseimpute/main.nf | 6 +- 13 files changed, 267 insertions(+), 94 deletions(-) create mode 100644 subworkflows/local/vcf_chr_check/main.nf create mode 100644 subworkflows/local/vcf_chr_check/tests/main.function.test create mode 100644 subworkflows/local/vcf_chr_check/tests/main.nf.test create mode 100644 subworkflows/local/vcf_chr_check/tests/main.nf.test.snap create mode 100644 subworkflows/local/vcf_chr_check/tests/nextflow.config rename subworkflows/local/{vcf_chr_rename => vcf_chr_check}/tests/nextflow_rename.config (100%) create mode 100644 subworkflows/local/vcf_chr_check/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index 76b55b44..330744ca 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -56,7 +56,7 @@ process { ext.prefix = { "${meta.id}_${meta.region}" } } - withName: 'VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' { + withName: 'VCF_CHR_CHECK:VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' { ext.args = [ "-Oz", "--no-version" diff --git a/subworkflows/local/vcf_chr_check/main.nf b/subworkflows/local/vcf_chr_check/main.nf new file mode 100644 index 00000000..d7fc03d0 --- /dev/null +++ b/subworkflows/local/vcf_chr_check/main.nf @@ -0,0 +1,73 @@ +include { VCFCHREXTRACT as VCFCHRBFR } from '../../../modules/local/vcfchrextract/main.nf' +include { VCFCHREXTRACT as VCFCHRAFT } from '../../../modules/local/vcfchrextract/main.nf' +include { VCF_CHR_RENAME } from '../vcf_chr_rename/main.nf' + +workflow VCF_CHR_CHECK { + take: + ch_vcf // channel: [ [id], vcf, index ] + ch_fasta // channel: [ [id], fasta, fai ] + + main: + + ch_versions = Channel.empty() + + // Get contig names from the VCF + VCFCHRBFR(ch_vcf.map{ metaV, vcf, csi -> [metaV, vcf] }) + + // Check if the contig names are the same as the reference + chr_disjoint = check_chr(VCFCHRBFR.out.chr, ch_vcf, ch_fasta) + + if (params.rename_chr == true) { + // Generate the chromosome renaming file + VCF_CHR_RENAME( + chr_disjoint.to_rename.map{meta, vcf, index, nb -> [meta, vcf, index]}, + ch_fasta + ) + + // Check if modification has solved the problem + VCFCHRAFT(VCF_CHR_RENAME.out.vcf_renamed.map{ metaV, vcf, csi -> [metaV, vcf] }) + + chr_disjoint_after = check_chr(VCFCHRAFT.out.chr, VCF_CHR_RENAME.out.vcf_renamed, ch_fasta) + + chr_disjoint_after.to_rename.map{ + error 'Even after renaming errors are still present. Please check that contigs name in vcf and fasta file are equivalent.' + } + ch_vcf_renamed = VCF_CHR_RENAME.out.vcf_renamed + + } else { + chr_disjoint.to_rename.map { + error 'Some contig names in the VCF do not match the reference genome. Please set `rename_chr` to `true` to rename the contigs.' + } + ch_vcf_renamed = Channel.of([[],[],[]]) + } + + ch_vcf_out = chr_disjoint.no_rename + .map{meta, vcf, csi, chr -> [meta, vcf, csi]} + .mix(ch_vcf_renamed) + + emit: + vcf = ch_vcf_out // [ meta, vcf, csi ] + versions = ch_versions // channel: [ versions.yml ] +} + + +def check_chr(ch_chr, ch_vcf, ch_fasta){ + chr_checked = ch_chr + .combine(ch_vcf, by:0) + .combine(ch_fasta) + .map{metaI, chr, vcf, csi, metaG, fasta, fai -> + [ + metaI, vcf, csi, + chr.readLines()*.split(' ').collect{it[0]}, + fai.readLines()*.split('\t').collect{it[0]} + ] + } + .map { meta, vcf, csi, chr, fai -> + [meta, vcf, csi, (chr-fai).size()] + } + .branch{ + no_rename: it[3] == 0 + to_rename: it[3] > 0 + } + return chr_checked +} \ No newline at end of file diff --git a/subworkflows/local/vcf_chr_check/tests/main.function.test b/subworkflows/local/vcf_chr_check/tests/main.function.test new file mode 100644 index 00000000..e69de29b diff --git a/subworkflows/local/vcf_chr_check/tests/main.nf.test b/subworkflows/local/vcf_chr_check/tests/main.nf.test new file mode 100644 index 00000000..9b86e5fb --- /dev/null +++ b/subworkflows/local/vcf_chr_check/tests/main.nf.test @@ -0,0 +1,110 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_CHR_CHECK" + script "../main.nf" + + workflow "VCF_CHR_CHECK" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_chr_check" + tag "vcf_chr_check" + + tag "bcftools" + tag "bcftools/annotate" + tag "bcftools/index" + tag "faitochr" + + test("Should run without error") { + config "./nextflow_rename.config" + when { + workflow { + """ + fai_file = Channel.of('22\t10000\t7\t60\t61', '21\t10000\t7\t60\t61').collectFile(name: '21_22.fai', newLine: true) + input[0] = Channel.fromList([ + [ + [id: "chr22"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi",checkIfExist:true) + ], + [ + [id: "chr21"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true) + ] + ]) + input[1] = Channel.of([[id:"GRCh37"],[]]) + .combine(fai_file) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + test("Should run with error due to missing renaming params") { + config "./nextflow.config" + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "multi"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf.gz.tbi",checkIfExist:true) + ], + [ + [id: "chr21"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true) + ] + ]) + input[1] = Channel.of([ + [id:"GRCh37"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai",checkIfExist:true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport.contains("Some contig names in the VCF do not match the reference genome. Please set `rename_chr` to `true` to rename the contigs.")} + ) + } + } + test("Should run with error after renaming"){ + config "./nextflow_rename.config" + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "multi"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi",checkIfExist:true) + ] + ]) // Error due to multiple contigs name in header not present in fasta file + input[1] = Channel.of([ + [id:"GRCh37"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai",checkIfExist:true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport.contains("Even after renaming errors are still present. Please check that contigs name in vcf and fasta file are equivalent.")} + ) + } + } +} diff --git a/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap b/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap new file mode 100644 index 00000000..bcbc86bd --- /dev/null +++ b/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "Should run without error": { + "content": [ + { + "0": [ + [ + { + "id": "chr21" + }, + "chr21_chrrename.vcf.gz:md5,22785a5d7ec1132f766efae5f8e00adf", + "chr21_chrrename.vcf.gz.csi:md5,b5b5fd753ee54ebd3c8e4b1fe2261cdb" + ], + [ + { + "id": "chr22" + }, + "chr22_chrrename.vcf.gz:md5,23de9b4db1406415806e627969cec749", + "chr22_chrrename.vcf.gz.csi:md5,ba370ca13289fee4be59253a1f4609e2" + ] + ], + "1": [ + + ], + "vcf": [ + [ + { + "id": "chr21" + }, + "chr21_chrrename.vcf.gz:md5,22785a5d7ec1132f766efae5f8e00adf", + "chr21_chrrename.vcf.gz.csi:md5,b5b5fd753ee54ebd3c8e4b1fe2261cdb" + ], + [ + { + "id": "chr22" + }, + "chr22_chrrename.vcf.gz:md5,23de9b4db1406415806e627969cec749", + "chr22_chrrename.vcf.gz.csi:md5,ba370ca13289fee4be59253a1f4609e2" + ] + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-24T14:19:22.618820686" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_chr_check/tests/nextflow.config b/subworkflows/local/vcf_chr_check/tests/nextflow.config new file mode 100644 index 00000000..8edef9d8 --- /dev/null +++ b/subworkflows/local/vcf_chr_check/tests/nextflow.config @@ -0,0 +1,4 @@ +params { + max_memory = '7.GB' + rename_chr = false +} diff --git a/subworkflows/local/vcf_chr_rename/tests/nextflow_rename.config b/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config similarity index 100% rename from subworkflows/local/vcf_chr_rename/tests/nextflow_rename.config rename to subworkflows/local/vcf_chr_check/tests/nextflow_rename.config diff --git a/subworkflows/local/vcf_chr_check/tests/tags.yml b/subworkflows/local/vcf_chr_check/tests/tags.yml new file mode 100644 index 00000000..d090629e --- /dev/null +++ b/subworkflows/local/vcf_chr_check/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_chr_check: + - subworkflows/local/vcf_chr_check/** diff --git a/subworkflows/local/vcf_chr_rename/main.nf b/subworkflows/local/vcf_chr_rename/main.nf index 6872e7b1..43b30cba 100644 --- a/subworkflows/local/vcf_chr_rename/main.nf +++ b/subworkflows/local/vcf_chr_rename/main.nf @@ -1,7 +1,6 @@ include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main.nf' include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' include { FAITOCHR } from '../../../modules/local/faitochr/main.nf' -include { VCFCHREXTRACT } from '../../../modules/local/vcfchrextract/main.nf' workflow VCF_CHR_RENAME { take: @@ -12,59 +11,25 @@ workflow VCF_CHR_RENAME { ch_versions = Channel.empty() - // Get contig names from the VCF - VCFCHREXTRACT(ch_vcf.map{ metaV, vcf, csi -> [metaV, vcf] }) + // Generate the chromosome renaming file + FAITOCHR(ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }) + ch_versions = ch_versions.mix(FAITOCHR.out.versions) - // Check if the contig names are the same as the reference - chr_disjoint = VCFCHREXTRACT.out.chr - .combine(ch_vcf, by:0) - .combine(ch_fasta) - .map{metaI, chr, vcf, csi, metaG, fasta, fai -> - [ - metaI, vcf, csi, - chr.readLines()*.split(' ').collect{it[0]}, - fai.readLines()*.split('\t').collect{it[0]} - ] - } - .map { meta, vcf, csi, chr, fai -> - [meta, vcf, csi, (chr-fai).size()] - } - .branch{ - no_rename: it[3] == 0 - to_rename: it[3] > 0 - } + // Rename the chromosome without prefix + BCFTOOLS_ANNOTATE( + ch_vcf // channel: [ [id], vcf, index ] + .combine(Channel.of([[],[],[]])) + .combine(FAITOCHR.out.annot_chr.map{it[1]}) + ) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) - if (chr_disjoint.to_rename.ifEmpty(true) != true){ - if (params.rename_chr == true) { - println 'Some contig names in the VCF do not match the reference genome. Renaming the contigs by adding / removing "chr" prefix ...' - // Generate the chromosome renaming file - FAITOCHR(ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }) - ch_versions = ch_versions.mix(FAITOCHR.out.versions) + BCFTOOLS_INDEX(BCFTOOLS_ANNOTATE.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions.first()) - // Rename the chromosome without prefix - BCFTOOLS_ANNOTATE(chr_disjoint.to_rename.map{ meta, vcf, csi, chr -> [meta, vcf, csi] } - .combine(Channel.of([[], [], []])) - .combine(FAITOCHR.out.annot_chr.map{it[1]}) - ) - ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) - - BCFTOOLS_INDEX(BCFTOOLS_ANNOTATE.out.vcf) - ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions.first()) - - ch_vcf_renamed = BCFTOOLS_ANNOTATE.out.vcf - .combine(BCFTOOLS_INDEX.out.csi, by:0) - - ch_vcf_out = chr_disjoint.no_rename - .map{meta, vcf, csi, chr -> [meta, vcf, csi]} - .mix(ch_vcf_renamed) - } else { - error 'Some contig names in the VCF do not match the reference genome. Please set `rename_chr` to `true` to rename the contigs.' - } - } else { - ch_vcf_out = ch_vcf - } + ch_vcf_renamed = BCFTOOLS_ANNOTATE.out.vcf + .combine(BCFTOOLS_INDEX.out.csi, by:0) emit: - vcf = ch_vcf_out // [ meta, vcf, csi ] + vcf_renamed = ch_vcf_renamed // [ meta, vcf, csi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_chr_rename/tests/main.nf.test b/subworkflows/local/vcf_chr_rename/tests/main.nf.test index 2b93cb35..9ccd01b0 100644 --- a/subworkflows/local/vcf_chr_rename/tests/main.nf.test +++ b/subworkflows/local/vcf_chr_rename/tests/main.nf.test @@ -3,6 +3,8 @@ nextflow_workflow { name "Test Subworkflow VCF_CHR_RENAME" script "../main.nf" + config "./nextflow.config" + workflow "VCF_CHR_RENAME" tag "subworkflows" @@ -16,7 +18,6 @@ nextflow_workflow { tag "faitochr" test("Should run without error") { - config "./nextflow_rename.config" when { workflow { """ @@ -48,36 +49,4 @@ nextflow_workflow { ) } } - test("Should run with error") { - config "./nextflow.config" - when { - workflow { - """ - input[0] = Channel.fromList([ - [ - [id: "chr22"], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf.gz",checkIfExist:true), - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf.gz.tbi",checkIfExist:true) - ], - [ - [id: "chr21"], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true) - ] - ]) - input[1] = Channel.of([ - [id:"GRCh37"], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta",checkIfExist:true), - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai",checkIfExist:true) - ]) - """ - } - } - - then { - assertAll( - { assert workflow.failed }, - ) - } - } } diff --git a/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap b/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap index a9d24208..60ce4bdb 100644 --- a/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap @@ -7,8 +7,8 @@ { "id": "chr21" }, - "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz", - "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi" + "chr21_chrrename.vcf.gz:md5,39cd8e316cd9b9282b8289d69d81260b", + "chr21_chrrename.vcf.gz.csi:md5,3bbbb50b0dd3515d380eabe0013cde19" ], [ { @@ -23,13 +23,13 @@ "versions.yml:md5,3698013e288e15d392e1cd3e22d2022a", "versions.yml:md5,924dd2e49d998f8f0da93799e62196f7" ], - "vcf": [ + "vcf_renamed": [ [ { "id": "chr21" }, - "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz", - "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi" + "chr21_chrrename.vcf.gz:md5,39cd8e316cd9b9282b8289d69d81260b", + "chr21_chrrename.vcf.gz.csi:md5,3bbbb50b0dd3515d380eabe0013cde19" ], [ { @@ -50,6 +50,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-22T19:14:31.190183685" + "timestamp": "2024-03-24T11:45:51.636035695" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_chr_rename/tests/nextflow.config b/subworkflows/local/vcf_chr_rename/tests/nextflow.config index 8edef9d8..227aed3d 100644 --- a/subworkflows/local/vcf_chr_rename/tests/nextflow.config +++ b/subworkflows/local/vcf_chr_rename/tests/nextflow.config @@ -1,4 +1,3 @@ params { max_memory = '7.GB' - rename_chr = false } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 4f950bfb..b86f183f 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -22,7 +22,7 @@ include { BAM_DOWNSAMPLE } from '../../subworkflows/local/bam_downs include { COMPUTE_GL as GL_TRUTH } from '../../subworkflows/local/compute_gl' include { COMPUTE_GL as GL_INPUT } from '../../subworkflows/local/compute_gl' include { VCF_IMPUTE_GLIMPSE } from '../../subworkflows/nf-core/vcf_impute_glimpse' -include { VCF_CHR_RENAME } from '../../subworkflows/local/vcf_chr_rename' +include { VCF_CHR_CHECK } from '../../subworkflows/local/vcf_chr_check' include { GET_PANEL } from '../../subworkflows/local/get_panel' /* @@ -81,10 +81,10 @@ workflow PHASEIMPUTE { // if (params.step == 'impute' || params.step == 'panel_prep') { // Remove if necessary "chr" - //VCF_CHR_RENAME(ch_panel, ch_fasta) + VCF_CHR_CHECK(ch_panel, ch_fasta) // Prepare the panel - GET_PANEL(ch_panel, ch_fasta) + GET_PANEL(VCF_CHR_CHECK.out.vcf, ch_fasta) ch_versions = ch_versions.mix(GET_PANEL.out.versions.first()) From 2861e7bc805c361c44572f6d560a765ff27e8dc7 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 24 Mar 2024 14:24:04 +0100 Subject: [PATCH 05/18] Update changelog --- docs/development.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development.md b/docs/development.md index 8126332d..79483c57 100644 --- a/docs/development.md +++ b/docs/development.md @@ -2,7 +2,7 @@ ## Features and tasks -- [] Add automatic detection of chromosome name to create a renaming file for the vcf +- [x] Add automatic detection of chromosome name to create a renaming file for the vcf - [] Make the different tests workflows work - [] Simulation - [] Validation From 170c799c72c48e61b0b32a585697d51d6f54487d Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 24 Mar 2024 15:00:05 +0100 Subject: [PATCH 06/18] Update grep version for vcfchrextract Fix with empty string Add versions --- CHANGELOG.md | 1 + modules/local/vcfchrextract/main.nf | 4 ++-- subworkflows/local/vcf_chr_check/main.nf | 5 ++++- workflows/phaseimpute/main.nf | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb015936..c063019b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - correct meta map propagation - Test impute and test sim works - [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5) +- [#20](https://github.com/nf-core/phaseimpute/pull/20) - Added automatic detection of vcf contigs and automatic renaming available ### `Fixed` diff --git a/modules/local/vcfchrextract/main.nf b/modules/local/vcfchrextract/main.nf index f6559930..b458bb0e 100644 --- a/modules/local/vcfchrextract/main.nf +++ b/modules/local/vcfchrextract/main.nf @@ -30,7 +30,7 @@ process VCFCHREXTRACT { cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) - grep: \$( grep --version |& sed '1!d; s/^.*grep (GNU grep) //' ) + grep: \$( grep --help |& grep -o -E '[0-9]+\\.[0-9]+\\.[0-9]+' ) END_VERSIONS """ @@ -43,7 +43,7 @@ process VCFCHREXTRACT { cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) - grep: \$( grep --version |& sed '1!d; s/^.*grep (GNU grep) //' ) + grep: \$( grep --help |& grep -o -E '[0-9]+\\.[0-9]+\\.[0-9]+' ) END_VERSIONS """ } diff --git a/subworkflows/local/vcf_chr_check/main.nf b/subworkflows/local/vcf_chr_check/main.nf index d7fc03d0..93d34c62 100644 --- a/subworkflows/local/vcf_chr_check/main.nf +++ b/subworkflows/local/vcf_chr_check/main.nf @@ -13,6 +13,7 @@ workflow VCF_CHR_CHECK { // Get contig names from the VCF VCFCHRBFR(ch_vcf.map{ metaV, vcf, csi -> [metaV, vcf] }) + ch_versions = ch_versions.mix(VCFCHRBFR.out.versions.first()) // Check if the contig names are the same as the reference chr_disjoint = check_chr(VCFCHRBFR.out.chr, ch_vcf, ch_fasta) @@ -23,9 +24,11 @@ workflow VCF_CHR_CHECK { chr_disjoint.to_rename.map{meta, vcf, index, nb -> [meta, vcf, index]}, ch_fasta ) + ch_versions = ch_versions.mix(VCF_CHR_RENAME.out.versions.first()) // Check if modification has solved the problem VCFCHRAFT(VCF_CHR_RENAME.out.vcf_renamed.map{ metaV, vcf, csi -> [metaV, vcf] }) + ch_versions = ch_versions.mix(VCFCHRAFT.out.versions.first()) chr_disjoint_after = check_chr(VCFCHRAFT.out.chr, VCF_CHR_RENAME.out.vcf_renamed, ch_fasta) @@ -38,7 +41,7 @@ workflow VCF_CHR_CHECK { chr_disjoint.to_rename.map { error 'Some contig names in the VCF do not match the reference genome. Please set `rename_chr` to `true` to rename the contigs.' } - ch_vcf_renamed = Channel.of([[],[],[]]) + ch_vcf_renamed = Channel.empty() } ch_vcf_out = chr_disjoint.no_rename diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index b86f183f..8535f860 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -82,10 +82,10 @@ workflow PHASEIMPUTE { if (params.step == 'impute' || params.step == 'panel_prep') { // Remove if necessary "chr" VCF_CHR_CHECK(ch_panel, ch_fasta) + ch_versions = ch_versions.mix(VCF_CHR_CHECK.out.versions.first()) // Prepare the panel GET_PANEL(VCF_CHR_CHECK.out.vcf, ch_fasta) - ch_versions = ch_versions.mix(GET_PANEL.out.versions.first()) // Output channel of input process From a8b9fad8a089b9dd5140a8db6caf00e9495b31b0 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 24 Mar 2024 15:08:50 +0100 Subject: [PATCH 07/18] Delete unecessary spaces --- subworkflows/local/vcf_chr_check/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/vcf_chr_check/main.nf b/subworkflows/local/vcf_chr_check/main.nf index 93d34c62..6c930e25 100644 --- a/subworkflows/local/vcf_chr_check/main.nf +++ b/subworkflows/local/vcf_chr_check/main.nf @@ -42,7 +42,7 @@ workflow VCF_CHR_CHECK { error 'Some contig names in the VCF do not match the reference genome. Please set `rename_chr` to `true` to rename the contigs.' } ch_vcf_renamed = Channel.empty() - } + } ch_vcf_out = chr_disjoint.no_rename .map{meta, vcf, csi, chr -> [meta, vcf, csi]} @@ -73,4 +73,4 @@ def check_chr(ch_chr, ch_vcf, ch_fasta){ to_rename: it[3] > 0 } return chr_checked -} \ No newline at end of file +} From 823e91b51b2060b9c68146fd4dd03f905180e2b7 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Sun, 24 Mar 2024 15:12:05 +0100 Subject: [PATCH 08/18] Fix linting eclint --- subworkflows/local/vcf_chr_check/main.nf | 152 +++++++++--------- .../vcf_chr_check/tests/main.function.test | 1 + .../vcf_chr_check/tests/main.nf.test.snap | 6 +- 3 files changed, 80 insertions(+), 79 deletions(-) diff --git a/subworkflows/local/vcf_chr_check/main.nf b/subworkflows/local/vcf_chr_check/main.nf index 6c930e25..db5960b8 100644 --- a/subworkflows/local/vcf_chr_check/main.nf +++ b/subworkflows/local/vcf_chr_check/main.nf @@ -1,76 +1,76 @@ -include { VCFCHREXTRACT as VCFCHRBFR } from '../../../modules/local/vcfchrextract/main.nf' -include { VCFCHREXTRACT as VCFCHRAFT } from '../../../modules/local/vcfchrextract/main.nf' -include { VCF_CHR_RENAME } from '../vcf_chr_rename/main.nf' - -workflow VCF_CHR_CHECK { - take: - ch_vcf // channel: [ [id], vcf, index ] - ch_fasta // channel: [ [id], fasta, fai ] - - main: - - ch_versions = Channel.empty() - - // Get contig names from the VCF - VCFCHRBFR(ch_vcf.map{ metaV, vcf, csi -> [metaV, vcf] }) - ch_versions = ch_versions.mix(VCFCHRBFR.out.versions.first()) - - // Check if the contig names are the same as the reference - chr_disjoint = check_chr(VCFCHRBFR.out.chr, ch_vcf, ch_fasta) - - if (params.rename_chr == true) { - // Generate the chromosome renaming file - VCF_CHR_RENAME( - chr_disjoint.to_rename.map{meta, vcf, index, nb -> [meta, vcf, index]}, - ch_fasta - ) - ch_versions = ch_versions.mix(VCF_CHR_RENAME.out.versions.first()) - - // Check if modification has solved the problem - VCFCHRAFT(VCF_CHR_RENAME.out.vcf_renamed.map{ metaV, vcf, csi -> [metaV, vcf] }) - ch_versions = ch_versions.mix(VCFCHRAFT.out.versions.first()) - - chr_disjoint_after = check_chr(VCFCHRAFT.out.chr, VCF_CHR_RENAME.out.vcf_renamed, ch_fasta) - - chr_disjoint_after.to_rename.map{ - error 'Even after renaming errors are still present. Please check that contigs name in vcf and fasta file are equivalent.' - } - ch_vcf_renamed = VCF_CHR_RENAME.out.vcf_renamed - - } else { - chr_disjoint.to_rename.map { - error 'Some contig names in the VCF do not match the reference genome. Please set `rename_chr` to `true` to rename the contigs.' - } - ch_vcf_renamed = Channel.empty() - } - - ch_vcf_out = chr_disjoint.no_rename - .map{meta, vcf, csi, chr -> [meta, vcf, csi]} - .mix(ch_vcf_renamed) - - emit: - vcf = ch_vcf_out // [ meta, vcf, csi ] - versions = ch_versions // channel: [ versions.yml ] -} - - -def check_chr(ch_chr, ch_vcf, ch_fasta){ - chr_checked = ch_chr - .combine(ch_vcf, by:0) - .combine(ch_fasta) - .map{metaI, chr, vcf, csi, metaG, fasta, fai -> - [ - metaI, vcf, csi, - chr.readLines()*.split(' ').collect{it[0]}, - fai.readLines()*.split('\t').collect{it[0]} - ] - } - .map { meta, vcf, csi, chr, fai -> - [meta, vcf, csi, (chr-fai).size()] - } - .branch{ - no_rename: it[3] == 0 - to_rename: it[3] > 0 - } - return chr_checked -} +include { VCFCHREXTRACT as VCFCHRBFR } from '../../../modules/local/vcfchrextract/main.nf' +include { VCFCHREXTRACT as VCFCHRAFT } from '../../../modules/local/vcfchrextract/main.nf' +include { VCF_CHR_RENAME } from '../vcf_chr_rename/main.nf' + +workflow VCF_CHR_CHECK { + take: + ch_vcf // channel: [ [id], vcf, index ] + ch_fasta // channel: [ [id], fasta, fai ] + + main: + + ch_versions = Channel.empty() + + // Get contig names from the VCF + VCFCHRBFR(ch_vcf.map{ metaV, vcf, csi -> [metaV, vcf] }) + ch_versions = ch_versions.mix(VCFCHRBFR.out.versions.first()) + + // Check if the contig names are the same as the reference + chr_disjoint = check_chr(VCFCHRBFR.out.chr, ch_vcf, ch_fasta) + + if (params.rename_chr == true) { + // Generate the chromosome renaming file + VCF_CHR_RENAME( + chr_disjoint.to_rename.map{meta, vcf, index, nb -> [meta, vcf, index]}, + ch_fasta + ) + ch_versions = ch_versions.mix(VCF_CHR_RENAME.out.versions.first()) + + // Check if modification has solved the problem + VCFCHRAFT(VCF_CHR_RENAME.out.vcf_renamed.map{ metaV, vcf, csi -> [metaV, vcf] }) + ch_versions = ch_versions.mix(VCFCHRAFT.out.versions.first()) + + chr_disjoint_after = check_chr(VCFCHRAFT.out.chr, VCF_CHR_RENAME.out.vcf_renamed, ch_fasta) + + chr_disjoint_after.to_rename.map{ + error 'Even after renaming errors are still present. Please check that contigs name in vcf and fasta file are equivalent.' + } + ch_vcf_renamed = VCF_CHR_RENAME.out.vcf_renamed + + } else { + chr_disjoint.to_rename.map { + error 'Some contig names in the VCF do not match the reference genome. Please set `rename_chr` to `true` to rename the contigs.' + } + ch_vcf_renamed = Channel.empty() + } + + ch_vcf_out = chr_disjoint.no_rename + .map{meta, vcf, csi, chr -> [meta, vcf, csi]} + .mix(ch_vcf_renamed) + + emit: + vcf = ch_vcf_out // [ meta, vcf, csi ] + versions = ch_versions // channel: [ versions.yml ] +} + + +def check_chr(ch_chr, ch_vcf, ch_fasta){ + chr_checked = ch_chr + .combine(ch_vcf, by:0) + .combine(ch_fasta) + .map{metaI, chr, vcf, csi, metaG, fasta, fai -> + [ + metaI, vcf, csi, + chr.readLines()*.split(' ').collect{it[0]}, + fai.readLines()*.split('\t').collect{it[0]} + ] + } + .map { meta, vcf, csi, chr, fai -> + [meta, vcf, csi, (chr-fai).size()] + } + .branch{ + no_rename: it[3] == 0 + to_rename: it[3] > 0 + } + return chr_checked +} diff --git a/subworkflows/local/vcf_chr_check/tests/main.function.test b/subworkflows/local/vcf_chr_check/tests/main.function.test index e69de29b..8b137891 100644 --- a/subworkflows/local/vcf_chr_check/tests/main.function.test +++ b/subworkflows/local/vcf_chr_check/tests/main.function.test @@ -0,0 +1 @@ + diff --git a/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap b/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap index bcbc86bd..daf406f5 100644 --- a/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap @@ -19,7 +19,7 @@ ] ], "1": [ - + ], "vcf": [ [ @@ -38,7 +38,7 @@ ] ], "versions": [ - + ] } ], @@ -48,4 +48,4 @@ }, "timestamp": "2024-03-24T14:19:22.618820686" } -} \ No newline at end of file +} From 4a4e73d8013d317bec8dfff2acc3b5ab2b501676 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 25 Mar 2024 20:56:56 +0100 Subject: [PATCH 09/18] Add more test to vcf_chr tests --- docs/development.md | 3 +- modules/local/faitochr/main.nf | 4 +- .../vcf_chr_check/tests/main.function.test | 1 - .../local/vcf_chr_check/tests/main.nf.test | 76 ++++++++++++- .../vcf_chr_check/tests/main.nf.test.snap | 104 +++++++++++++++++- 5 files changed, 173 insertions(+), 15 deletions(-) delete mode 100644 subworkflows/local/vcf_chr_check/tests/main.function.test diff --git a/docs/development.md b/docs/development.md index 79483c57..5258ca73 100644 --- a/docs/development.md +++ b/docs/development.md @@ -2,7 +2,8 @@ ## Features and tasks -- [x] Add automatic detection of chromosome name to create a renaming file for the vcf +- [x] Add automatic detection of chromosome name to create a renaming file for the vcf files +- [] Add automatic detection of chromosome name to create a renaming file for the bam files - [] Make the different tests workflows work - [] Simulation - [] Validation diff --git a/modules/local/faitochr/main.nf b/modules/local/faitochr/main.nf index 6c81bb6e..7e2baa60 100644 --- a/modules/local/faitochr/main.nf +++ b/modules/local/faitochr/main.nf @@ -27,11 +27,11 @@ process FAITOCHR { # Take the fai file and add/remove the chr prefix to the chromosome names # Keep only first column, remove chr prefix if present, add chr prefix if needed - # chr prefix is added only on number only chromosome names + # chr prefix is added only on number only chromosome names or XYMT awk -F'\t' '{print \$1}' ${fai} | \ sed 's/^chr//g' | \ awk -v col1=\${col1} -v col2=\${col2} \ - 'BEGIN {OFS=" "} {if (\$1 ~ /^[0-9]+\$/) print col1\$1, col2\$1; else print \$1, \$1}' \ + 'BEGIN {OFS=" "} {if (\$1 ~ /^[0-9]+|[XYMT]\$/) print col1\$1, col2\$1; else print \$1, \$1}' \ > ${prefix}.txt # We should have a file with the chromosome names in the second column corresponding to the fai format diff --git a/subworkflows/local/vcf_chr_check/tests/main.function.test b/subworkflows/local/vcf_chr_check/tests/main.function.test deleted file mode 100644 index 8b137891..00000000 --- a/subworkflows/local/vcf_chr_check/tests/main.function.test +++ /dev/null @@ -1 +0,0 @@ - diff --git a/subworkflows/local/vcf_chr_check/tests/main.nf.test b/subworkflows/local/vcf_chr_check/tests/main.nf.test index 9b86e5fb..1375e9dc 100644 --- a/subworkflows/local/vcf_chr_check/tests/main.nf.test +++ b/subworkflows/local/vcf_chr_check/tests/main.nf.test @@ -15,7 +15,39 @@ nextflow_workflow { tag "bcftools/index" tag "faitochr" - test("Should run without error") { + test("Rename: panel chr + fasta chr") { + config "./nextflow_rename.config" + when { + workflow { + """ + fai_file = Channel.of('chr22\t10000\t7\t60\t61', 'chr21\t10000\t7\t60\t61').collectFile(name: 'chr21_22.fai', newLine: true) + input[0] = Channel.fromList([ + [ + [id: "chr22"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi",checkIfExist:true) + ], + [ + [id: "chr21"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true) + ] + ]) + input[1] = Channel.of([[id:"GRCh37"],[]]) + .combine(fai_file) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("Rename: panel chr + fasta no chr") { config "./nextflow_rename.config" when { workflow { @@ -46,7 +78,41 @@ nextflow_workflow { ) } } - test("Should run with error due to missing renaming params") { + + test("Rename: panel no chr + fasta chr") { + config "./nextflow_rename.config" + when { + workflow { + """ + fai_file = Channel.of( + 'chr1\t10000\t7\t60\t61','chr2\t10000\t7\t60\t61','chr3\t10000\t7\t60\t61','chr4\t10000\t7\t60\t61','chr5\t10000\t7\t60\t61','chr6\t10000\t7\t60\t61', + 'chr7\t10000\t7\t60\t61','chr8\t10000\t7\t60\t61','chr9\t10000\t7\t60\t61','chr10\t10000\t7\t60\t61','chr11\t10000\t7\t60\t61','chr12\t10000\t7\t60\t61', + 'chr13\t10000\t7\t60\t61','chr14\t10000\t7\t60\t61','chr15\t10000\t7\t60\t61','chr16\t10000\t7\t60\t61','chr17\t10000\t7\t60\t61','chr18\t10000\t7\t60\t61', + 'chr19\t10000\t7\t60\t61','chr20\t10000\t7\t60\t61','chr21\t10000\t7\t60\t61','chr22\t10000\t7\t60\t61', + 'chrX\t10000\t7\t60\t61','chrY\t10000\t7\t60\t61', 'chrMT\t10000\t7\t60\t61' + ).collectFile(name: 'chr.fai', newLine: true) + input[0] = Channel.fromList([ + [ + [id: "22"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test_models.vcf.gz",checkIfExist:true), + [] + ] + ]) + input[1] = Channel.of([[id:"GRCh37"],[]]) + .combine(fai_file) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("Error : missing renaming params") { config "./nextflow.config" when { workflow { @@ -63,9 +129,7 @@ nextflow_workflow { file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true) ] ]) - input[1] = Channel.of([ - [id:"GRCh37"], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta",checkIfExist:true), + input[1] = Channel.of([[id:"GRCh37"],[], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai",checkIfExist:true) ]) """ @@ -79,7 +143,7 @@ nextflow_workflow { ) } } - test("Should run with error after renaming"){ + test("Error : still difference after renaming"){ config "./nextflow_rename.config" when { workflow { diff --git a/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap b/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap index daf406f5..b7077fc4 100644 --- a/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap @@ -1,5 +1,95 @@ { - "Should run without error": { + "Rename: panel chr + fasta chr": { + "content": [ + { + "0": [ + [ + { + "id": "chr21" + }, + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi" + ], + [ + { + "id": "chr22" + }, + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" + ] + ], + "1": [ + "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", + "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1" + ], + "vcf": [ + [ + { + "id": "chr21" + }, + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi" + ], + [ + { + "id": "chr22" + }, + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" + ] + ], + "versions": [ + "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", + "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-25T20:52:06.173493472" + }, + "Rename: panel no chr + fasta chr": { + "content": [ + { + "0": [ + [ + { + "id": "22" + }, + "22_chrrename.vcf.gz:md5,070a96d1053a64f2de2132ee8800847c", + "22_chrrename.vcf.gz.csi:md5,e190b690b4b0a4d088231862e5408582" + ] + ], + "1": [ + "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", + "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1", + "versions.yml:md5,e576f40503c3506c782228485d06fbf1" + ], + "vcf": [ + [ + { + "id": "22" + }, + "22_chrrename.vcf.gz:md5,070a96d1053a64f2de2132ee8800847c", + "22_chrrename.vcf.gz.csi:md5,e190b690b4b0a4d088231862e5408582" + ] + ], + "versions": [ + "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", + "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1", + "versions.yml:md5,e576f40503c3506c782228485d06fbf1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-25T20:50:17.522495093" + }, + "Rename: panel chr + fasta no chr": { "content": [ { "0": [ @@ -19,7 +109,9 @@ ] ], "1": [ - + "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", + "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1", + "versions.yml:md5,e576f40503c3506c782228485d06fbf1" ], "vcf": [ [ @@ -38,7 +130,9 @@ ] ], "versions": [ - + "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", + "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1", + "versions.yml:md5,e576f40503c3506c782228485d06fbf1" ] } ], @@ -46,6 +140,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-24T14:19:22.618820686" + "timestamp": "2024-03-25T20:50:05.571457009" } -} +} \ No newline at end of file From ac1a2d6a393b5b3d1f59461213bd2954403a4d2f Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 27 Mar 2024 10:27:36 +0100 Subject: [PATCH 10/18] Update default compression of annotate and shema extension of panel index --- assets/schema_input_panel.json | 2 +- conf/modules.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/schema_input_panel.json b/assets/schema_input_panel.json index c1107f5e..e0efbcc2 100644 --- a/assets/schema_input_panel.json +++ b/assets/schema_input_panel.json @@ -26,7 +26,7 @@ }, "index": { "type": "string", - "pattern": "^\\S+\\.(vcf|bcf)\\.(tbi|csi)$", + "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?\\.(tbi|csi)$", "errorMessage": "Panel index file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with '.csi' or '.tbi' extension" } }, diff --git a/conf/modules.config b/conf/modules.config index 330744ca..6664154e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -56,7 +56,7 @@ process { ext.prefix = { "${meta.id}_${meta.region}" } } - withName: 'VCF_CHR_CHECK:VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHR_CHECK:VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' { ext.args = [ "-Oz", "--no-version" From 705487b76667b7a348028f89db3466d833019d05 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 27 Mar 2024 14:09:29 +0100 Subject: [PATCH 11/18] Update config files for nf-test --- subworkflows/local/vcf_chr_check/tests/nextflow.config | 10 ++++++++++ .../local/vcf_chr_check/tests/nextflow_rename.config | 10 ++++++++++ .../local/vcf_chr_rename/tests/nextflow.config | 10 ++++++++++ 3 files changed, 30 insertions(+) diff --git a/subworkflows/local/vcf_chr_check/tests/nextflow.config b/subworkflows/local/vcf_chr_check/tests/nextflow.config index 8edef9d8..ff02f295 100644 --- a/subworkflows/local/vcf_chr_check/tests/nextflow.config +++ b/subworkflows/local/vcf_chr_check/tests/nextflow.config @@ -2,3 +2,13 @@ params { max_memory = '7.GB' rename_chr = false } + +process { + withName: BCFTOOLS_ANNOTATE { + ext.args = [ + "-Oz", + "--no-version" + ].join(' ') + ext.prefix = { "${meta.id}_chrrename" } + } +} diff --git a/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config b/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config index 2abf982b..1f3970da 100644 --- a/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config +++ b/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config @@ -2,3 +2,13 @@ params { max_memory = '7.GB' rename_chr = true } + +process { + withName: BCFTOOLS_ANNOTATE { + ext.args = [ + "-Oz", + "--no-version" + ].join(' ') + ext.prefix = { "${meta.id}_chrrename" } + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_chr_rename/tests/nextflow.config b/subworkflows/local/vcf_chr_rename/tests/nextflow.config index 227aed3d..1e587d1e 100644 --- a/subworkflows/local/vcf_chr_rename/tests/nextflow.config +++ b/subworkflows/local/vcf_chr_rename/tests/nextflow.config @@ -1,3 +1,13 @@ params { max_memory = '7.GB' } + +process { + withName: BCFTOOLS_ANNOTATE { + ext.args = [ + "-Oz", + "--no-version" + ].join(' ') + ext.prefix = { "${meta.id}_chrrename" } + } +} \ No newline at end of file From e92d47911e2e239eb41d868f4d93f47fd18b588f Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 27 Mar 2024 14:10:12 +0100 Subject: [PATCH 12/18] Add end new line to config files --- subworkflows/local/vcf_chr_check/tests/nextflow_rename.config | 2 +- subworkflows/local/vcf_chr_rename/tests/nextflow.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config b/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config index 1f3970da..d048cbcb 100644 --- a/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config +++ b/subworkflows/local/vcf_chr_check/tests/nextflow_rename.config @@ -11,4 +11,4 @@ process { ].join(' ') ext.prefix = { "${meta.id}_chrrename" } } -} \ No newline at end of file +} diff --git a/subworkflows/local/vcf_chr_rename/tests/nextflow.config b/subworkflows/local/vcf_chr_rename/tests/nextflow.config index 1e587d1e..cf2f7a63 100644 --- a/subworkflows/local/vcf_chr_rename/tests/nextflow.config +++ b/subworkflows/local/vcf_chr_rename/tests/nextflow.config @@ -10,4 +10,4 @@ process { ].join(' ') ext.prefix = { "${meta.id}_chrrename" } } -} \ No newline at end of file +} From 1d640031ce5cc6614b9f14a2777478981067d202 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 27 Mar 2024 16:49:10 +0100 Subject: [PATCH 13/18] Add environment for fai to chr --- modules/local/faitochr/environment.yml | 7 +++++++ modules/local/faitochr/main.nf | 5 +++++ modules/local/vcfchrextract/environment.yml | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 modules/local/faitochr/environment.yml diff --git a/modules/local/faitochr/environment.yml b/modules/local/faitochr/environment.yml new file mode 100644 index 00000000..54441c6f --- /dev/null +++ b/modules/local/faitochr/environment.yml @@ -0,0 +1,7 @@ +name: faitochr +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - anaconda::gawk=5.1.0 \ No newline at end of file diff --git a/modules/local/faitochr/main.nf b/modules/local/faitochr/main.nf index 7e2baa60..13a06217 100644 --- a/modules/local/faitochr/main.nf +++ b/modules/local/faitochr/main.nf @@ -2,6 +2,11 @@ process FAITOCHR { tag "$meta.id" label 'process_single' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'biocontainers/gawk:5.1.0' }" + input: tuple val(meta), path(fai) diff --git a/modules/local/vcfchrextract/environment.yml b/modules/local/vcfchrextract/environment.yml index e0abc8d2..3280dfaf 100644 --- a/modules/local/vcfchrextract/environment.yml +++ b/modules/local/vcfchrextract/environment.yml @@ -1,4 +1,4 @@ -name: bcftools_annotate +name: vcfchrextract channels: - conda-forge - bioconda From 6153a61915a70da1c1fd1444fbdb522ed3fed07d Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 27 Mar 2024 13:03:19 -0300 Subject: [PATCH 14/18] Update schema_input_panel.json --- assets/schema_input_panel.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/schema_input_panel.json b/assets/schema_input_panel.json index c1107f5e..242a4136 100644 --- a/assets/schema_input_panel.json +++ b/assets/schema_input_panel.json @@ -26,8 +26,8 @@ }, "index": { "type": "string", - "pattern": "^\\S+\\.(vcf|bcf)\\.(tbi|csi)$", - "errorMessage": "Panel index file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with '.csi' or '.tbi' extension" + "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?\\.(tbi|csi)$", + "errorMessage": "Panel index file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with optional '.gz' extension and with '.csi' or '.tbi' extension" } }, "required": ["panel", "chr", "vcf", "index"] From 212905fa39b8d2ec64cb8bb04e4fd49a6f0290d9 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 27 Mar 2024 17:22:39 +0100 Subject: [PATCH 15/18] Replace faitochr by gawk command --- assets/faitochr_awk.txt | 1 + modules.json | 5 ++ modules/local/faitochr/main.nf | 61 ----------------- modules/local/faitochr/tests/main.nf.test | 57 ---------------- .../local/faitochr/tests/main.nf.test.snap | 68 ------------------- modules/local/faitochr/tests/tags.yml | 2 - .../faitochr => nf-core/gawk}/environment.yml | 4 +- modules/nf-core/gawk/main.nf | 54 +++++++++++++++ modules/nf-core/gawk/meta.yml | 50 ++++++++++++++ .../local/vcf_chr_check/tests/main.nf.test | 2 +- .../vcf_chr_check/tests/main.nf.test.snap | 18 ++--- subworkflows/local/vcf_chr_rename/main.nf | 10 ++- .../local/vcf_chr_rename/tests/main.nf.test | 2 +- .../vcf_chr_rename/tests/main.nf.test.snap | 10 +-- 14 files changed, 135 insertions(+), 209 deletions(-) create mode 100644 assets/faitochr_awk.txt delete mode 100644 modules/local/faitochr/main.nf delete mode 100644 modules/local/faitochr/tests/main.nf.test delete mode 100644 modules/local/faitochr/tests/main.nf.test.snap delete mode 100644 modules/local/faitochr/tests/tags.yml rename modules/{local/faitochr => nf-core/gawk}/environment.yml (62%) create mode 100644 modules/nf-core/gawk/main.nf create mode 100644 modules/nf-core/gawk/meta.yml diff --git a/assets/faitochr_awk.txt b/assets/faitochr_awk.txt new file mode 100644 index 00000000..21385643 --- /dev/null +++ b/assets/faitochr_awk.txt @@ -0,0 +1 @@ +'BEGIN {FS="\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' \ No newline at end of file diff --git a/modules.json b/modules.json index 0a99d93d..bfc15ccf 100644 --- a/modules.json +++ b/modules.json @@ -48,6 +48,11 @@ "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", "installed_by": ["modules"] }, + "gawk": { + "branch": "master", + "git_sha": "dc3527855e7358c6d8400828754c0caa5f11698f", + "installed_by": ["modules"] + }, "glimpse/chunk": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", diff --git a/modules/local/faitochr/main.nf b/modules/local/faitochr/main.nf deleted file mode 100644 index 13a06217..00000000 --- a/modules/local/faitochr/main.nf +++ /dev/null @@ -1,61 +0,0 @@ -process FAITOCHR { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'biocontainers/gawk:5.1.0' }" - - input: - tuple val(meta), path(fai) - - output: - tuple val(meta), path("*.txt"), emit: annot_chr - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - # Check if chr prefix is present in the chromosome names - col1="chr" - col2="" - if [ \$(awk 'NR==1 {print \$1}' ${fai} | grep -c '^chr') -eq 1 ]; then - col1="" - col2="chr" - fi - - # Take the fai file and add/remove the chr prefix to the chromosome names - # Keep only first column, remove chr prefix if present, add chr prefix if needed - # chr prefix is added only on number only chromosome names or XYMT - awk -F'\t' '{print \$1}' ${fai} | \ - sed 's/^chr//g' | \ - awk -v col1=\${col1} -v col2=\${col2} \ - 'BEGIN {OFS=" "} {if (\$1 ~ /^[0-9]+|[XYMT]\$/) print col1\$1, col2\$1; else print \$1, \$1}' \ - > ${prefix}.txt - - # We should have a file with the chromosome names in the second column corresponding to the fai format - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(awk --version | grep -o 'GNU Awk [0-9.]*' | cut -d ' ' -f 3) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(awk --version | grep -o 'GNU Awk [0-9.]*' | cut -d ' ' -f 3) - END_VERSIONS - """ -} diff --git a/modules/local/faitochr/tests/main.nf.test b/modules/local/faitochr/tests/main.nf.test deleted file mode 100644 index 1b066c5c..00000000 --- a/modules/local/faitochr/tests/main.nf.test +++ /dev/null @@ -1,57 +0,0 @@ -nextflow_process { - - name "Test Process FAITOCHR" - script "../main.nf" - process "FAITOCHR" - - tag "modules" - tag "modules_local" - tag "faitochr" - - test("fai add chr") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true), - true - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("fai remove chr") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true), - false - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/local/faitochr/tests/main.nf.test.snap b/modules/local/faitochr/tests/main.nf.test.snap deleted file mode 100644 index 3a5c5379..00000000 --- a/modules/local/faitochr/tests/main.nf.test.snap +++ /dev/null @@ -1,68 +0,0 @@ -{ - "fai add chr": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.txt:md5,dc360653d0d1848e6cc01661dbff389c" - ] - ], - "1": [ - "versions.yml:md5,0d85e18b9c36aa2db49ad51930d9a5e6" - ], - "annot_chr": [ - [ - { - "id": "test" - }, - "test.txt:md5,dc360653d0d1848e6cc01661dbff389c" - ] - ], - "versions": [ - "versions.yml:md5,0d85e18b9c36aa2db49ad51930d9a5e6" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-08T17:33:55.801913" - }, - "fai remove chr": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.txt:md5,c8aa996df2a03384617fff85d911f401" - ] - ], - "1": [ - "versions.yml:md5,0d85e18b9c36aa2db49ad51930d9a5e6" - ], - "annot_chr": [ - [ - { - "id": "test" - }, - "test.txt:md5,c8aa996df2a03384617fff85d911f401" - ] - ], - "versions": [ - "versions.yml:md5,0d85e18b9c36aa2db49ad51930d9a5e6" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-08T17:34:01.901705" - } -} diff --git a/modules/local/faitochr/tests/tags.yml b/modules/local/faitochr/tests/tags.yml deleted file mode 100644 index 5de9b9a1..00000000 --- a/modules/local/faitochr/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -faitochr: - - "modules/local/faitochr/**" diff --git a/modules/local/faitochr/environment.yml b/modules/nf-core/gawk/environment.yml similarity index 62% rename from modules/local/faitochr/environment.yml rename to modules/nf-core/gawk/environment.yml index 54441c6f..34513c7f 100644 --- a/modules/local/faitochr/environment.yml +++ b/modules/nf-core/gawk/environment.yml @@ -1,7 +1,7 @@ -name: faitochr +name: gawk channels: - conda-forge - bioconda - defaults dependencies: - - anaconda::gawk=5.1.0 \ No newline at end of file + - anaconda::gawk=5.1.0 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 00000000..f856a1f8 --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,54 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(input) + path(program_file) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + + program = program_file ? "-f ${program_file}" : "${args2}" + + """ + awk \\ + ${args} \\ + ${program} \\ + ${input} \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension}" + + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 00000000..2b6033b0 --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,50 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on this file on the `ext.args2` or in the program file + pattern: "*" + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: The output file - specify the name of this file using `ext.prefix` and the extension using `ext.suffix` + pattern: "*" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/local/vcf_chr_check/tests/main.nf.test b/subworkflows/local/vcf_chr_check/tests/main.nf.test index 1375e9dc..da76fdc9 100644 --- a/subworkflows/local/vcf_chr_check/tests/main.nf.test +++ b/subworkflows/local/vcf_chr_check/tests/main.nf.test @@ -13,7 +13,7 @@ nextflow_workflow { tag "bcftools" tag "bcftools/annotate" tag "bcftools/index" - tag "faitochr" + tag "gawk" test("Rename: panel chr + fasta chr") { config "./nextflow_rename.config" diff --git a/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap b/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap index b7077fc4..10f7f443 100644 --- a/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_chr_check/tests/main.nf.test.snap @@ -20,7 +20,7 @@ ], "1": [ "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", - "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1" + "versions.yml:md5,ad4c5338cd27e20789c70e28b8c74a42" ], "vcf": [ [ @@ -40,7 +40,7 @@ ], "versions": [ "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", - "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1" + "versions.yml:md5,ad4c5338cd27e20789c70e28b8c74a42" ] } ], @@ -48,7 +48,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-25T20:52:06.173493472" + "timestamp": "2024-03-27T17:21:13.588561053" }, "Rename: panel no chr + fasta chr": { "content": [ @@ -64,7 +64,7 @@ ], "1": [ "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", - "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1", + "versions.yml:md5,ad4c5338cd27e20789c70e28b8c74a42", "versions.yml:md5,e576f40503c3506c782228485d06fbf1" ], "vcf": [ @@ -78,7 +78,7 @@ ], "versions": [ "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", - "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1", + "versions.yml:md5,ad4c5338cd27e20789c70e28b8c74a42", "versions.yml:md5,e576f40503c3506c782228485d06fbf1" ] } @@ -87,7 +87,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-25T20:50:17.522495093" + "timestamp": "2024-03-27T17:21:39.92481538" }, "Rename: panel chr + fasta no chr": { "content": [ @@ -110,7 +110,7 @@ ], "1": [ "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", - "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1", + "versions.yml:md5,ad4c5338cd27e20789c70e28b8c74a42", "versions.yml:md5,e576f40503c3506c782228485d06fbf1" ], "vcf": [ @@ -131,7 +131,7 @@ ], "versions": [ "versions.yml:md5,395e1cde3f38a30f5d80769972ba23d8", - "versions.yml:md5,472924b76f1737f4b6c7708ddd4f88b1", + "versions.yml:md5,ad4c5338cd27e20789c70e28b8c74a42", "versions.yml:md5,e576f40503c3506c782228485d06fbf1" ] } @@ -140,6 +140,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-25T20:50:05.571457009" + "timestamp": "2024-03-27T17:21:28.214969089" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_chr_rename/main.nf b/subworkflows/local/vcf_chr_rename/main.nf index 43b30cba..84a3b8b9 100644 --- a/subworkflows/local/vcf_chr_rename/main.nf +++ b/subworkflows/local/vcf_chr_rename/main.nf @@ -1,6 +1,6 @@ include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main.nf' include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' -include { FAITOCHR } from '../../../modules/local/faitochr/main.nf' +include { GAWK as FAITOCHR } from '../../../modules/nf-core/gawk/main.nf' workflow VCF_CHR_RENAME { take: @@ -12,14 +12,18 @@ workflow VCF_CHR_RENAME { ch_versions = Channel.empty() // Generate the chromosome renaming file - FAITOCHR(ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }) + FAITOCHR( + ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }, + Channel.of( + 'BEGIN {FS="\\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { sub(/^chr/, "", $1); if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' + ).collectFile(name:"program.txt")) ch_versions = ch_versions.mix(FAITOCHR.out.versions) // Rename the chromosome without prefix BCFTOOLS_ANNOTATE( ch_vcf // channel: [ [id], vcf, index ] .combine(Channel.of([[],[],[]])) - .combine(FAITOCHR.out.annot_chr.map{it[1]}) + .combine(FAITOCHR.out.output.map{it[1]}) ) ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) diff --git a/subworkflows/local/vcf_chr_rename/tests/main.nf.test b/subworkflows/local/vcf_chr_rename/tests/main.nf.test index 9ccd01b0..d8d9c4e4 100644 --- a/subworkflows/local/vcf_chr_rename/tests/main.nf.test +++ b/subworkflows/local/vcf_chr_rename/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_workflow { tag "bcftools" tag "bcftools/annotate" tag "bcftools/index" - tag "faitochr" + tag "gawk" test("Should run without error") { when { diff --git a/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap b/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap index 60ce4bdb..52c5f8fe 100644 --- a/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_chr_rename/tests/main.nf.test.snap @@ -20,8 +20,8 @@ ], "1": [ "versions.yml:md5,176431a832f84d4c329f6d1e9c74d203", - "versions.yml:md5,3698013e288e15d392e1cd3e22d2022a", - "versions.yml:md5,924dd2e49d998f8f0da93799e62196f7" + "versions.yml:md5,260c4004a4bb0936c43f932e50de9c19", + "versions.yml:md5,3698013e288e15d392e1cd3e22d2022a" ], "vcf_renamed": [ [ @@ -41,8 +41,8 @@ ], "versions": [ "versions.yml:md5,176431a832f84d4c329f6d1e9c74d203", - "versions.yml:md5,3698013e288e15d392e1cd3e22d2022a", - "versions.yml:md5,924dd2e49d998f8f0da93799e62196f7" + "versions.yml:md5,260c4004a4bb0936c43f932e50de9c19", + "versions.yml:md5,3698013e288e15d392e1cd3e22d2022a" ] } ], @@ -50,6 +50,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-24T11:45:51.636035695" + "timestamp": "2024-03-27T17:18:53.771496074" } } \ No newline at end of file From 272b7592f55a93b349d8c9ab0dbcb3ac2337aaf9 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 27 Mar 2024 17:23:36 +0100 Subject: [PATCH 16/18] Remove unecessary file --- assets/faitochr_awk.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 assets/faitochr_awk.txt diff --git a/assets/faitochr_awk.txt b/assets/faitochr_awk.txt deleted file mode 100644 index 21385643..00000000 --- a/assets/faitochr_awk.txt +++ /dev/null @@ -1 +0,0 @@ -'BEGIN {FS="\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' \ No newline at end of file From 8b8dd922adee5e97d46df5688fe116a655f1fe89 Mon Sep 17 00:00:00 2001 From: Louis LE NEZET <58640615+LouisLeNezet@users.noreply.github.com> Date: Fri, 29 Mar 2024 09:46:03 +0100 Subject: [PATCH 17/18] Update nextflow_schema.json Co-authored-by: Anabella Trigila <18577080+atrigila@users.noreply.github.com> --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index f3df5871..b1552439 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -88,7 +88,7 @@ }, "rename_chr": { "type": "boolean", - "description": "Should the vcf files be renamed to match the reference genome (e.g. 'chr1' -> '1')", + "description": "Should the panel vcf files be renamed to match the reference genome (e.g. 'chr1' -> '1')", "pattern": "true|false" }, "email": { From 27e10a26a2323f260bae1c2385446ffabfd3b71c Mon Sep 17 00:00:00 2001 From: Louis LE NEZET <58640615+LouisLeNezet@users.noreply.github.com> Date: Fri, 29 Mar 2024 09:46:14 +0100 Subject: [PATCH 18/18] Update CHANGELOG.md Co-authored-by: Anabella Trigila <18577080+atrigila@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c063019b..4d437c83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - correct meta map propagation - Test impute and test sim works - [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5) -- [#20](https://github.com/nf-core/phaseimpute/pull/20) - Added automatic detection of vcf contigs and automatic renaming available +- [#20](https://github.com/nf-core/phaseimpute/pull/20) - Added automatic detection of vcf contigs for the reference panel and automatic renaming available ### `Fixed`