diff --git a/CHANGELOG.md b/CHANGELOG.md index 042b1474..bbc71090 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#148](https://github.com/nf-core/phaseimpute/pull/148) - Fix awsfulltest github action for manual dispatch - [#149](https://github.com/nf-core/phaseimpute/pull/149) - Remove the map file from the awsfulltest - [#152](https://github.com/nf-core/phaseimpute/pull/152) - Fix URLs in the documentation and remove tools citation in the README, use a white background for all images in the documentation. +- [#157](https://github.com/nf-core/phaseimpute/pull/157) - Add `chunk_model` as parameter for better control over `GLIMPSE2_CHUNK` and set window size in `GLIMPSE1_CHUNK` and `GLIMPSE2_chunk` to 4mb to reduce number of chunks (empirical). ### `Fixed` diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index d578817a..3ab60861 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -41,8 +41,8 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_NORM' { - ext.args = ["-m +any", "--no-version", "--output-type z", "--write-index=tbi"].join(' ') - ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } + ext.args = ["-m +any", "--no-version", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } publishDir = [ enabled: false ] } @@ -62,7 +62,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:VCFLIB_VCFFIXUP' { - ext.prefix = { "${meta.id}_${meta.chr}_fixup" } + ext.prefix = { "${meta.id}_${meta.chr}_fixup" } publishDir = [ path: { "${params.outdir}/prep_panel/panel" }, mode: params.publish_dir_mode, @@ -72,7 +72,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX' { - ext.args = "--tbi" + ext.args = "--tbi" publishDir = [ path: { "${params.outdir}/prep_panel/panel" }, mode: params.publish_dir_mode, @@ -88,6 +88,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { + ext.args = "--window-mb 4" ext.prefix = { "${meta.id}_chunks" } } @@ -123,7 +124,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_CONVERT' { - ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} + ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} publishDir = [ path: { "${params.outdir}/prep_panel/haplegend/" }, mode: params.publish_dir_mode, @@ -184,6 +185,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = "--window-size 4" ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse1" } publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse1/" }, @@ -194,7 +196,8 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { - ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" } + ext.args = "--window-mb 4" + ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" } publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse2/" }, mode: params.publish_dir_mode, diff --git a/conf/test_all.config b/conf/test_all.config index 854f8664..c16d7904 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -33,6 +33,7 @@ params { phase = true normalize = true compute_freq = false + chunk_model = "recursive" // Pipeline steps steps = "all" diff --git a/conf/test_dog.config b/conf/test_dog.config index 33755994..6ebc015e 100644 --- a/conf/test_dog.config +++ b/conf/test_dog.config @@ -32,6 +32,7 @@ params { normalize = false compute_freq = false rename_chr = true + chunk_model = "recursive" // Input data input = params.pipelines_testdata_base_path + "dog_data/csv/sample_dog.csv" diff --git a/conf/test_panelprep.config b/conf/test_panelprep.config index 181d81de..f62f2189 100644 --- a/conf/test_panelprep.config +++ b/conf/test_panelprep.config @@ -32,6 +32,7 @@ params { normalize = true compute_freq = true remove_samples = "HG00096,HG00097,HG00099,HG00100" + chunk_model = "recursive" // Pipeline steps steps = "panelprep" diff --git a/main.nf b/main.nf index a5e3fa05..f5303872 100644 --- a/main.nf +++ b/main.nf @@ -43,6 +43,7 @@ workflow NFCORE_PHASEIMPUTE { ch_map // channel: map file for imputation ch_posfile // channel: samplesheet read in from --posfile ch_chunks // channel: samplesheet read in from --chunks + chunk_model // parameter: chunk model ch_versions // channel: versions of software used main: @@ -101,6 +102,7 @@ workflow NFCORE_PHASEIMPUTE { ch_map, ch_posfile, ch_chunks, + chunk_model, ch_versions ) emit: @@ -141,6 +143,7 @@ workflow { PIPELINE_INITIALISATION.out.gmap, PIPELINE_INITIALISATION.out.posfile, PIPELINE_INITIALISATION.out.chunks, + PIPELINE_INITIALISATION.out.chunk_model, PIPELINE_INITIALISATION.out.versions ) // diff --git a/nextflow.config b/nextflow.config index 14781b4a..3e402c96 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,7 +10,7 @@ params { // steps - steps = null + steps = null // Input options input = null @@ -24,6 +24,7 @@ params { normalize = true compute_freq = false remove_samples = null + chunk_model = 'sequential' // ChrCheck parameters rename_chr = false diff --git a/nextflow_schema.json b/nextflow_schema.json index c51892b8..3beb8382 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -132,6 +132,13 @@ "binaryref": { "type": "string", "description": "Whether to generate a binary reference file to be used with GLIMPSE2" + }, + "chunk_model": { + "type": "string", + "description": "Model type to use for GLIMPSE2_CHUNK", + "enum": ["recursive", "sequential"], + "default": "sequential", + "hidden": true } } }, diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 54fc07d1..21eab92c 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -279,6 +279,9 @@ workflow PIPELINE_INITIALISATION { // Check that all input files have the correct index checkFileIndex(ch_input.mix(ch_input_truth, ch_ref_gen, ch_panel)) + // Chunk model + chunk_model = params.chunk_model + emit: input = ch_input // [ [meta], file, index ] input_truth = ch_input_truth // [ [meta], file, index ] @@ -289,6 +292,7 @@ workflow PIPELINE_INITIALISATION { gmap = ch_map // [ [map], map ] posfile = ch_posfile // [ [panel, chr], vcf, index, hap, legend ] chunks = ch_chunks // [ [chr], txt ] + chunk_model = chunk_model versions = ch_versions } @@ -407,6 +411,9 @@ def validateInputParameters() { error("To use `--remove_samples` you need to include `--normalize`.") } } + + // Check that the chunk model is provided + assert params.chunk_model : "No chunk model provided" } // diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test index 238bc53c..2c7045c9 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test @@ -17,6 +17,7 @@ nextflow_workflow { chunks = "../../../tests/csv/chunks.csv" posfile = "../../../tests/csv/posfile.csv" panel = "../../../tests/csv/panel.csv" + chunk_model = "recursive" } workflow { """ @@ -49,6 +50,7 @@ nextflow_workflow { posfile = "../../../tests/csv/posfile.csv" panel = "../../../tests/csv/panel.csv" input_region = "$moduleTestDir/region.csv" + chunk_model = "sequential" } workflow { """ @@ -80,6 +82,7 @@ nextflow_workflow { chunks = "../../../tests/csv/chunks.csv" panel = "../../../tests/csv/panel.csv" input_region = "$moduleTestDir/region.csv" + chunk_model = "recursive" } workflow { """ diff --git a/subworkflows/local/vcf_chunk_glimpse/main.nf b/subworkflows/local/vcf_chunk_glimpse/main.nf index 457b0b1b..dad0dcf0 100644 --- a/subworkflows/local/vcf_chunk_glimpse/main.nf +++ b/subworkflows/local/vcf_chunk_glimpse/main.nf @@ -7,6 +7,7 @@ workflow VCF_CHUNK_GLIMPSE { take: ch_reference // channel: [ [panel, chr], vcf, csi ] ch_map // channel (optional): [ [chr], map ] + chunk_model // channel : model main: @@ -36,9 +37,6 @@ workflow VCF_CHUNK_GLIMPSE { ) .map { metaPC, it -> [metaPC, it["RegionIn"], it["RegionOut"]]} - // Make chunks with Glimpse2 (does not work with "sequential" mode) - chunk_model = "recursive" - ch_input_glimpse2 = ch_vcf_csi_chr .map{ metaPC, vcf, csi, chr -> [metaPC.subMap("chr"), metaPC, vcf, csi, chr] diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test index c222059e..9be51fbf 100644 --- a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test +++ b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test @@ -47,6 +47,7 @@ nextflow_workflow { file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_21.map", checkIfExist:true) ] ) + input[2] = "recursive" """ } } @@ -54,10 +55,12 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() }, - { assert snapshot(workflow.out.chunks.collect{ - path(it[1]).readLines() - }).match("chunksWithMap") + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() } ) } @@ -87,6 +90,7 @@ nextflow_workflow { [[chr: "chr22"], []], [[chr: "chr21"], []] ) + input[2] = "recursive" """ } } @@ -94,10 +98,55 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() }, - { assert snapshot(workflow.out.chunks.collect{ - path(it[1]).readLines() - }).match("chunksWithoutMap") + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() + } + ) + } + } + + test("Chunks with sequential model") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + input[1] = Channel.of( + [[chr: "chr22"], []], + [[chr: "chr21"], []] + ) + input[2] = "sequential" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() } ) } diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap index c6a21882..90a62932 100644 --- a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap @@ -1,23 +1,4 @@ { - "chunksWithoutMap": { - "content": [ - [ - [ - "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", - "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" - ], - [ - "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", - "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-07-18T11:26:35.422657952" - }, "Chunks without Map": { "content": [ { @@ -289,13 +270,23 @@ "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", "versions.yml:md5,7d277747b107043dd31d3aef18045eef" ] - } + }, + [ + [ + "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", + "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" + ], + [ + "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", + "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" + ] + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-18T11:26:34.820936365" + "timestamp": "2024-11-10T18:36:03.086912114" }, "Chunks with Map": { "content": [ @@ -536,16 +527,296 @@ "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", "versions.yml:md5,7d277747b107043dd31d3aef18045eef" ] - } + }, + [ + [ + "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", + "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" + ], + [ + "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", + "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" + ] + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-18T11:26:24.155279577" + "timestamp": "2024-11-10T18:35:46.374947355" }, - "chunksWithMap": { + "Chunks with sequential model": { "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "3": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:1-16609998", + "chr21:1-16590145" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-1248956422", + "chr21:16590146-1248956422" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:1-16609999", + "chr22:1-16590520" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-1248956422", + "chr22:16590521-1248956422" + ] + ], + "4": [ + [ + [ + + ] + ] + ], + "5": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ], + "binary": [ + [ + [ + + ] + ] + ], + "chunks": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "chunks_glimpse1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "chunks_glimpse2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:1-16609998", + "chr21:1-16590145" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-1248956422", + "chr21:16590146-1248956422" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:1-16609999", + "chr22:1-16590520" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-1248956422", + "chr22:16590521-1248956422" + ] + ], + "chunks_quilt": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "versions": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ] + }, [ [ "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", @@ -558,9 +829,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-18T11:26:24.609554836" + "timestamp": "2024-11-10T18:36:16.974727308" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config b/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config index 3f3e8b9e..5a412491 100644 --- a/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config +++ b/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config @@ -1,4 +1,9 @@ process { + resourceLimits = [ + memory : "2.GB", + cpus : 2, + time : "1h" + ] withName: GLIMPSE2_CHUNK { ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') ext.prefix = { "${meta.id}_${meta.chr}" } diff --git a/subworkflows/local/vcf_phase_shapeit5/main.nf b/subworkflows/local/vcf_phase_shapeit5/main.nf index af643b14..7bf0e7d2 100644 --- a/subworkflows/local/vcf_phase_shapeit5/main.nf +++ b/subworkflows/local/vcf_phase_shapeit5/main.nf @@ -7,19 +7,17 @@ include { BCFTOOLS_INDEX as VCF_BCFTOOLS_INDEX_2 } from '../../../modules/nf-cor workflow VCF_PHASE_SHAPEIT5 { take: - ch_vcf // channel (mandatory): [ [id, chr], vcf, csi, pedigree ] + ch_vcf // channel (mandatory) : [ [id, chr], vcf, csi, pedigree ] ch_region // channel (mandatory) : [ [chr, region], region ] - ch_ref // channel (optional) : [ [id, chr], ref, csi ] - ch_scaffold // channel (optional) : [ [id, chr], scaffold, csi ] + ch_ref // channel (optional) : [ [id, chr], ref, csi ] + ch_scaffold // channel (optional) : [ [id, chr], scaffold, csi ] ch_map // channel (mandatory) : [ [chr], map] + chunk_model // channel (mandatory) : [ model ] main: ch_versions = Channel.empty() - // Make chunks with Glimpse2 (does not work with "sequential" mode) - chunk_model = "recursive" - // Chunk with Glimpse2 ch_input_glimpse2 = ch_vcf .map{ @@ -43,9 +41,6 @@ workflow VCF_PHASE_SHAPEIT5 { ) .map { metaIC, it -> [metaIC, it["RegionBuf"], it["RegionCnk"]]} - ch_chunks_number = GLIMPSE2_CHUNK.out.chunk_chr - .map { meta, chunk -> [meta.subMap("chr"), chunk.countLines().intValue()]} - ch_phase_input = ch_vcf .combine(ch_chunks_glimpse2, by:0) .map{ diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test index 4a95348f..a2f855ac 100644 --- a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test +++ b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test @@ -20,7 +20,7 @@ nextflow_workflow { tag "bcftools" tag "bcftools/index" - test("Phase vcf with regions, no map, no ref, no scaffold") { + test("Phase vcf with regions, no map, no ref, no scaffold, recursive model") { when { params { max_cpus = 2 @@ -52,6 +52,7 @@ nextflow_workflow { [[chr: "chr22"],[]], [[chr: "chr21"], []] ) + input[5] = "recursive" """ } } @@ -59,10 +60,64 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() }, - { assert snapshot(workflow.out.vcf_tbi.collect{ - path(it[1]).vcf.summary - }).match("Phasing content") + { assert snapshot( + workflow.out, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + }).match() + }, + { workflow.out.vcf_tbi.collect{ + assert path(it[1]).vcf.phased + }} + ) + } + } + + test("Phase vcf with regions, no map, no ref, no scaffold, sequential model") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ] + ) + input[1] = Channel.of( + [[chr: "chr22", region:"chr22:16570000-16610000"], "chr22:16570000-16610000"], + [[chr: "chr21", region:"chr21:16570000-16610000"], "chr21:16570000-16610000"] + ) + input[2] = Channel.of([[],[],[]]).collect() + input[3] = Channel.of([[],[],[]]).collect() + input[4] = Channel.of( + [[chr: "chr22"],[]], + [[chr: "chr21"], []] + ) + input[5] = "sequential" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + }).match() }, { workflow.out.vcf_tbi.collect{ assert path(it[1]).vcf.phased @@ -70,6 +125,7 @@ nextflow_workflow { ) } } + /* TODO: Fix this test with https://github.com/odelaneau/shapeit5/issues/96 test("Phase vcf with regions, with map, no ref, no scaffold") { when { @@ -103,6 +159,7 @@ nextflow_workflow { [ [chr: "chr22"], file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_22.map", checkIfExist:true)], [ [chr: "chr21"], file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_21.map", checkIfExist:true)] ) + input[5] = "recursive" """ } } diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap index ffa1c169..bb7e9f9d 100644 --- a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap @@ -1,18 +1,70 @@ { - "Phasing content": { + "Phase vcf with regions, no map, no ref, no scaffold, sequential model": { "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP.vcf.gz:md5,4029303e3c083ebb2522fb5c8dc4b63a", + "1000GP.vcf.gz.csi:md5,c57057d136f6c859aac4e6ae28ec793b" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP.vcf.gz:md5,23d09ba884eda7449702fece3f652d9d", + "1000GP.vcf.gz.csi:md5,281791c87517a6f3e83c3fd736ec704e" + ] + ], + "1": [ + "versions.yml:md5,529c03b8d921c72026e91d71c0321811", + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,8ffcda8a9d22f60c90d0e4276da3e714", + "versions.yml:md5,cab7592ebcb3d391afcd3191a175723b", + "versions.yml:md5,ed131d2608f28f8ada06ccc42717575e" + ], + "vcf_tbi": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP.vcf.gz:md5,4029303e3c083ebb2522fb5c8dc4b63a", + "1000GP.vcf.gz.csi:md5,c57057d136f6c859aac4e6ae28ec793b" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP.vcf.gz:md5,23d09ba884eda7449702fece3f652d9d", + "1000GP.vcf.gz.csi:md5,281791c87517a6f3e83c3fd736ec704e" + ] + ], + "versions": [ + "versions.yml:md5,529c03b8d921c72026e91d71c0321811", + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,8ffcda8a9d22f60c90d0e4276da3e714", + "versions.yml:md5,cab7592ebcb3d391afcd3191a175723b", + "versions.yml:md5,ed131d2608f28f8ada06ccc42717575e" + ] + }, [ "VcfFile [chromosomes=[chr21], sampleCount=3196, variantCount=836, phased=true, phasedAutodetect=true]", "VcfFile [chromosomes=[chr22], sampleCount=3196, variantCount=903, phased=true, phasedAutodetect=true]" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-18T15:56:20.067633938" + "timestamp": "2024-11-10T19:18:52.508986134" }, - "Phase vcf with regions, no map, no ref, no scaffold": { + "Phase vcf with regions, no map, no ref, no scaffold, recursive model": { "content": [ { "0": [ @@ -21,16 +73,16 @@ "id": "1000GP", "chr": "chr21" }, - "1000GP.vcf.gz:md5,a8af1c991fb1ca9e0919e0d5ddae8968", - "1000GP.vcf.gz.csi:md5,b4c653bd398bb838b496e022705bb562" + "1000GP.vcf.gz:md5,d454c821a71b7c569540e381068fbe03", + "1000GP.vcf.gz.csi:md5,13e80ee9a038a715efd6f97befec28c6" ], [ { "id": "1000GP", "chr": "chr22" }, - "1000GP.vcf.gz:md5,106157dc553c3c19eebdce0567e69d4f", - "1000GP.vcf.gz.csi:md5,58026cf302bd82382a078c5690429b5c" + "1000GP.vcf.gz:md5,68e488b81ea8ca52a52b20fec603bf3e", + "1000GP.vcf.gz.csi:md5,98037a25e4112f8a4df62eba96d08634" ] ], "1": [ @@ -46,16 +98,16 @@ "id": "1000GP", "chr": "chr21" }, - "1000GP.vcf.gz:md5,a8af1c991fb1ca9e0919e0d5ddae8968", - "1000GP.vcf.gz.csi:md5,b4c653bd398bb838b496e022705bb562" + "1000GP.vcf.gz:md5,d454c821a71b7c569540e381068fbe03", + "1000GP.vcf.gz.csi:md5,13e80ee9a038a715efd6f97befec28c6" ], [ { "id": "1000GP", "chr": "chr22" }, - "1000GP.vcf.gz:md5,106157dc553c3c19eebdce0567e69d4f", - "1000GP.vcf.gz.csi:md5,58026cf302bd82382a078c5690429b5c" + "1000GP.vcf.gz:md5,68e488b81ea8ca52a52b20fec603bf3e", + "1000GP.vcf.gz.csi:md5,98037a25e4112f8a4df62eba96d08634" ] ], "versions": [ @@ -65,12 +117,16 @@ "versions.yml:md5,cab7592ebcb3d391afcd3191a175723b", "versions.yml:md5,ed131d2608f28f8ada06ccc42717575e" ] - } + }, + [ + "VcfFile [chromosomes=[chr21], sampleCount=3196, variantCount=836, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr22], sampleCount=3196, variantCount=903, phased=true, phasedAutodetect=true]" + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-22T15:46:34.727932091" + "timestamp": "2024-11-10T19:18:20.407509527" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config b/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config index 8817d417..c0c00d6f 100644 --- a/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config +++ b/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config @@ -1,7 +1,7 @@ process { withName: GLIMPSE2_CHUNK { ext.prefix = { "${meta.id}_chunks" } - ext.args = ["--window-mb 0.1", "--window-cm 0.1", "--window-count 2000", "--buffer-mb 0.05", "--buffer-cm 0.05", "--buffer-count 300"].join(' ') + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') } withName: SHAPEIT5_PHASECOMMON { diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 5ae079dc..fe4c1fa9 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -91,6 +91,7 @@ workflow PHASEIMPUTE { ch_map // channel: genetic map [ [chr], map] ch_posfile // channel: posfile [ [id, chr], vcf, index, hap, legend] ch_chunks // channel: chunks [ [chr], txt] + chunk_model // parameter: chunk model ch_versions // channel: versions of software used main: @@ -171,14 +172,15 @@ workflow PHASEIMPUTE { ch_region, [[],[],[]], [[],[],[]], - ch_map + ch_map, + chunk_model ) ch_panel_phased = VCF_PHASE_SHAPEIT5.out.vcf_tbi ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) } // Create chunks from reference VCF - VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) + VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map, chunk_model) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) // Assign chunks channels