From 4aee93f6882402b640f8a4c927c37a628fcc3744 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Wed, 6 Nov 2024 12:56:07 +0000 Subject: [PATCH 1/4] except itruth everything works --- assets/datavzrd/sompy.datavzrd.template.yaml | 18 +- assets/samplesheet.csv | 6 - assets/samplesheet_full.csv | 5 - assets/samplesheet_full_small.csv | 3 + assets/samplesheet_full_sv.csv | 3 + assets/samplesheet_small.csv | 3 + assets/samplesheet_somatic.csv | 7 - assets/samplesheet_somatic_indel.csv | 3 + assets/samplesheet_somatic_snv.csv | 4 + assets/samplesheet_somatic_sv.csv | 3 + assets/samplesheet_sv.csv | 4 + assets/samplesheet_sv_hg37.csv | 3 + assets/schema_input.json | 11 +- conf/modules.config | 101 +++--- conf/test.config | 48 --- conf/test_full.config | 4 +- conf/test_liftover.config | 44 --- conf/test_somatic.config | 39 --- conf/test_stub.config | 2 +- conf/truth.config | 57 ++-- nextflow.config | 18 +- nextflow_schema.json | 102 ++---- .../local/compare_benchmark_results.nf | 77 +++-- subworkflows/local/liftover_vcfs_truth.nf | 4 +- subworkflows/local/prepare_vcfs_test.nf | 33 +- subworkflows/local/prepare_vcfs_truth.nf | 4 +- .../local/report_benchmark_statistics.nf | 2 +- subworkflows/local/report_vcf_statistics.nf | 40 ++- .../local/small_germline_benchmark.nf | 12 +- subworkflows/local/small_somatic_benchmark.nf | 4 +- subworkflows/local/sv_germline_benchmark.nf | 18 +- subworkflows/local/sv_vcf_conversion.nf | 9 +- .../main.nf | 26 +- subworkflows/local/vcf_variant_filtering.nf | 31 +- workflows/variantbenchmarking.nf | 322 ++++-------------- 35 files changed, 357 insertions(+), 713 deletions(-) delete mode 100644 assets/samplesheet.csv delete mode 100644 assets/samplesheet_full.csv create mode 100644 assets/samplesheet_full_small.csv create mode 100644 assets/samplesheet_full_sv.csv create mode 100644 assets/samplesheet_small.csv delete mode 100644 assets/samplesheet_somatic.csv create mode 100644 assets/samplesheet_somatic_indel.csv create mode 100644 assets/samplesheet_somatic_snv.csv create mode 100644 assets/samplesheet_somatic_sv.csv create mode 100644 assets/samplesheet_sv.csv create mode 100644 assets/samplesheet_sv_hg37.csv delete mode 100644 conf/test.config delete mode 100644 conf/test_liftover.config delete mode 100644 conf/test_somatic.config diff --git a/assets/datavzrd/sompy.datavzrd.template.yaml b/assets/datavzrd/sompy.datavzrd.template.yaml index ac3eaf6..dc5afea 100644 --- a/assets/datavzrd/sompy.datavzrd.template.yaml +++ b/assets/datavzrd/sompy.datavzrd.template.yaml @@ -9,10 +9,12 @@ views: columns: Tool: display-mode: normal - Threshold: + Type: display-mode: normal TP_base: display-mode: normal + TP: + display-mode: normal FN: display-mode: normal TP_call: @@ -23,5 +25,17 @@ views: display-mode: normal Recall: display-mode: normal - F1: + recall_lower: + display-mode: normal + recall_upper: + display-mode: normal + recall2: + display-mode: normal + precision_lower: + display-mode: normal + precision_upper: + display-mode: normal + fp.region.size: + display-mode: normal + fp.rate: display-mode: normal diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index af2c0cd..0000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,6 +0,0 @@ -id,test_vcf,caller,vartype,pctsize,pctseq,pctovl,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,evaluationmode -manta1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/manta.HG002.chr21.vcf.gz,manta,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts -merged1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/Ashkenazim_HG002.filtered.sv.chr21.vcf.gz,merged,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts -dragen1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/HG002_DRAGEN_SV_hg19.chr21.vcf.gz,dragen,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts -strelka1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek/hg38/HG002.strelka.variants.chr21.vcf.gz,strelka,small,,,,,,,,,,,, -bcftools1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek/hg38/HG002.bcftools.chr21.vcf.gz,bcftools,small,,,,,,,,,,,, diff --git a/assets/samplesheet_full.csv b/assets/samplesheet_full.csv deleted file mode 100644 index ca2a6b1..0000000 --- a/assets/samplesheet_full.csv +++ /dev/null @@ -1,5 +0,0 @@ -id,test_vcf,caller,vartype,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,dup_to_ins,pctsize,pctseq,pctovl,evaluationmode,subsample -test1,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/manta_GIABv0.6/diploidSV_PASS_DUPtoINS.vcf.gz",manta,sv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts, -test2,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/delly_GIABv0.6/norm_recall_HG002.merged__filt_DUPtoINS.vcf.gz",delly,sv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts, -test4,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/SevenBridges_GraphGATKRefine_05052017/HG002-NA24385-50x.union_170414.split.vcf.gz",graph,small,,,,,,,,,,,,, -test5,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/OsloUniversityHospital_Exome_GATK_jointVC_11242015/HG002-HG003-HG004.jointVC.filter.vcf",gatk,small,,,,,,,,,,,,,"Sample_Diag-excap51-HG002-EEogPU" diff --git a/assets/samplesheet_full_small.csv b/assets/samplesheet_full_small.csv new file mode 100644 index 0000000..d525fa4 --- /dev/null +++ b/assets/samplesheet_full_small.csv @@ -0,0 +1,3 @@ +id,test_vcf,caller,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,dup_to_ins,pctsize,pctseq,pctovl,evaluationmode,subsample +test1,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/SevenBridges_GraphGATKRefine_05052017/HG002-NA24385-50x.union_170414.split.vcf.gz",graph,,,,,,,,,,,,, +test2,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/OsloUniversityHospital_Exome_GATK_jointVC_11242015/HG002-HG003-HG004.jointVC.filter.vcf",gatk,,,,,,,,,,,,,"Sample_Diag-excap51-HG002-EEogPU" diff --git a/assets/samplesheet_full_sv.csv b/assets/samplesheet_full_sv.csv new file mode 100644 index 0000000..ca2bb05 --- /dev/null +++ b/assets/samplesheet_full_sv.csv @@ -0,0 +1,3 @@ +id,test_vcf,caller,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,dup_to_ins,pctsize,pctseq,pctovl,evaluationmode,subsample +test1,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/manta_GIABv0.6/diploidSV_PASS_DUPtoINS.vcf.gz",manta,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts, +test2,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/delly_GIABv0.6/norm_recall_HG002.merged__filt_DUPtoINS.vcf.gz",delly,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts, diff --git a/assets/samplesheet_small.csv b/assets/samplesheet_small.csv new file mode 100644 index 0000000..77934d2 --- /dev/null +++ b/assets/samplesheet_small.csv @@ -0,0 +1,3 @@ +id,test_vcf,caller +test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek/hg38/HG002.strelka.variants.chr21.vcf.gz,strelka +test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek/hg38/HG002.bcftools.chr21.vcf.gz,bcftools diff --git a/assets/samplesheet_somatic.csv b/assets/samplesheet_somatic.csv deleted file mode 100644 index 161565f..0000000 --- a/assets/samplesheet_somatic.csv +++ /dev/null @@ -1,7 +0,0 @@ -id,test_vcf,caller,vartype,subsample -freebayes,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.freebayes.chr21.vcf.gz,freebayes,small, -manta1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.manta.somatic_sv.vcf.gz,manta,snv,"HCC1395_HCC1395T" -strelka1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.strelka.somatic_indels.vcf.gz,strelka,indel,"TUMOR" -strelka2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.strelka.somatic_snvs.vcf.gz,strelka,snv,"TUMOR" -tiddit2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.tiddit_sv_merge.vcf.gz,tiddit,sv,"HCC1395_HCC1395T" -manta2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.manta.somatic_sv.vcf.gz,manta,sv,"HCC1395_HCC1395T" diff --git a/assets/samplesheet_somatic_indel.csv b/assets/samplesheet_somatic_indel.csv new file mode 100644 index 0000000..7792ae3 --- /dev/null +++ b/assets/samplesheet_somatic_indel.csv @@ -0,0 +1,3 @@ +id,test_vcf,caller,subsample +test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.freebayes.chr21.vcf.gz,freebayes, +test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.strelka.somatic_indels.vcf.gz,strelka,"TUMOR" diff --git a/assets/samplesheet_somatic_snv.csv b/assets/samplesheet_somatic_snv.csv new file mode 100644 index 0000000..07708b9 --- /dev/null +++ b/assets/samplesheet_somatic_snv.csv @@ -0,0 +1,4 @@ +id,test_vcf,caller,subsample +test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.freebayes.chr21.vcf.gz,freebayes, +test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.manta.somatic_sv.vcf.gz,manta,"HCC1395_HCC1395T" +test3,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.strelka.somatic_snvs.vcf.gz,strelka,"TUMOR" diff --git a/assets/samplesheet_somatic_sv.csv b/assets/samplesheet_somatic_sv.csv new file mode 100644 index 0000000..7aeae5a --- /dev/null +++ b/assets/samplesheet_somatic_sv.csv @@ -0,0 +1,3 @@ +id,test_vcf,caller,subsample +test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.tiddit_sv_merge.vcf.gz,tiddit,"HCC1395_HCC1395T" +test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.manta.somatic_sv.vcf.gz,manta,"HCC1395_HCC1395T" diff --git a/assets/samplesheet_sv.csv b/assets/samplesheet_sv.csv new file mode 100644 index 0000000..dd76036 --- /dev/null +++ b/assets/samplesheet_sv.csv @@ -0,0 +1,4 @@ +id,test_vcf,caller,pctsize,pctseq,pctovl,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,evaluationmode +test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/manta.HG002.chr21.vcf.gz,manta,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts +test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/Ashkenazim_HG002.filtered.sv.chr21.vcf.gz,merged,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts +test3,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/HG002_DRAGEN_SV_hg19.chr21.vcf.gz,dragen,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts diff --git a/assets/samplesheet_sv_hg37.csv b/assets/samplesheet_sv_hg37.csv new file mode 100644 index 0000000..2089199 --- /dev/null +++ b/assets/samplesheet_sv_hg37.csv @@ -0,0 +1,3 @@ +id,test_vcf,caller +test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_delly_SV_hg19.chr21.vcf.gz,delly +test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_manta_SV_hg19_genotype.chr21.vcf.gz,manta diff --git a/assets/schema_input.json b/assets/schema_input.json index fd84111..7967366 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -31,15 +31,6 @@ "minLength": 1, "errorMessage": "Variant caller has to be defined. Can also be unknown, undefined or merged" }, - "vartype": { - "type": "string", - "pattern": "^\\S+$", - "description": "Variant type to apply benchmarking", - "meta": ["vartype"], - "minLength": 1, - "errorMessage": "Variant type can be only one of these: small, sv, snv, indel and cnv", - "enum": ["small", "sv", "snv", "indel", "cnv"] - }, "subsample": { "type": "string", "pattern": "^\\S+$", @@ -165,6 +156,6 @@ "default": null } }, - "required": ["test_vcf", "caller", "vartype", "id"] + "required": ["test_vcf", "caller", "id"] } } diff --git a/conf/modules.config b/conf/modules.config index cbd9898..d5ff443 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -20,7 +20,7 @@ process { withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: { "${params.outdir}/${params.variant_type}/multiqc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -33,15 +33,15 @@ process { withName: "VARIANT_EXTRACTOR" { ext.prefix = { input.baseName - ".vcf" } publishDir = [ - path: {"${params.outdir}/${meta.id}/preprocess"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/preprocess"}, pattern: "*{.vcf.gz}", mode: params.publish_dir_mode ] } withName: SVYNC { - ext.prefix = {"${meta.id}.${meta.vartype}.${meta.caller}"} + ext.prefix = {"${meta.id}.${meta.caller}"} publishDir = [ - path: {"${params.outdir}/${meta.id}/preprocess"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/preprocess"}, pattern: "*{.vcf.gz,vcf.gz.tbi}", mode: params.publish_dir_mode ] @@ -54,7 +54,7 @@ process { ext.prefix = { vcf.baseName - ".vcf" + ".sort"} ext.args = {"--output-type z" } publishDir = [ - path: {"${params.outdir}/${meta.id}/preprocess"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/preprocess"}, pattern: "*{.vcf.gz}", mode: params.publish_dir_mode ] @@ -78,7 +78,7 @@ process { ext.prefix = { vcf.baseName - ".vcf" + ".snv" } ext.args = {"--output-type v --types snps" } publishDir = [ - path: {"${params.outdir}/${meta.id}/preprocess"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/preprocess"}, pattern: "*{.vcf}", mode: params.publish_dir_mode ] @@ -87,7 +87,7 @@ process { ext.prefix = { vcf.baseName - ".vcf" + ".indel" } ext.args = {"--output-type v --types indels" } publishDir = [ - path: {"${params.outdir}/${meta.id}/preprocess"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/preprocess"}, pattern: "*{.vcf}", mode: params.publish_dir_mode ] @@ -101,7 +101,7 @@ process { (params.exclude_expression != null )? "--exclude '$params.exclude_expression'" : "" ].join(' ').trim() } publishDir = [ - path: {"${params.outdir}/${meta.id}/preprocess/"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/preprocess/"}, pattern: "*{.vcf}", mode: params.publish_dir_mode ] @@ -109,7 +109,7 @@ process { withName: SURVIVOR_FILTER { ext.prefix = { vcf_file.baseName - ".vcf" + ".filter"} publishDir = [ - path: {"${params.outdir}/${meta.id}/preprocess"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/preprocess"}, pattern: "*{.vcf}", mode: params.publish_dir_mode ] @@ -135,30 +135,29 @@ process { } // Variant stats withName: SURVIVOR_STATS { - ext.prefix ={["${meta.id}.${meta.vartype}", + ext.prefix ={["${meta.id}", (meta.caller != null) ? ".${meta.caller}" : '' ].join('').trim() } publishDir = [ - path: {"${params.outdir}/${meta.id}/stats/survivor/"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/stats/survivor/"}, pattern: "*{.stats}", mode: params.publish_dir_mode ] } withName: BCFTOOLS_STATS { - ext.prefix ={["${meta.id}.${meta.vartype}", + ext.prefix ={["${meta.id}", (meta.caller != null) ? ".${meta.caller}" : '' ].join('').trim() } publishDir = [ - path: {"${params.outdir}/${meta.id}/stats/bcftools/"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/stats/bcftools/"}, pattern: "*{stats.txt}", mode: params.publish_dir_mode ] } // benchmark tools withName: "RTGTOOLS_FORMAT" { - ext.when = { params.method.split(',').contains('rtgtools') && !params.sdf } publishDir = [ path: {"${params.outdir}/references/rtgtools"}, pattern: "*{.sdf}", @@ -166,44 +165,43 @@ process { ] } withName: "RTGTOOLS_VCFEVAL" { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}.${meta.caller}"} + ext.prefix = {"${meta.id}.${params.truth_id}.${meta.caller}"} //ext.args = {"--ref-overlap --all-record --output-mode ga4gh"} - ext.when = { params.method.split(',').contains('rtgtools') } publishDir = [ - path: {"${params.outdir}/${meta.id}/rtgtools_bench"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/rtgtools"}, pattern: "*{.vcf.gz,vcf.gz.tbi,tsv.gz,txt}", mode: params.publish_dir_mode ] } withName: "HAPPY_HAPPY" { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}.${meta.caller}"} + ext.prefix = {"${meta.id}.${params.truth_id}.${meta.caller}"} //ext.args = {""} publishDir = [ - path: {"${params.outdir}/${meta.id}/happy_bench"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/happy"}, pattern: "*{.csv.gz,csv,json.gz,json,vcf.gz,vcf.gz.tbi}", mode: params.publish_dir_mode ] } withName: "HAPPY_SOMPY" { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}.${meta.caller}"} - ext.args = { meta.caller.contains("strelka") || meta.caller.contains("varscan") || meta.caller.contains("pisces") ? "--feature-table hcc.${meta.caller}.${meta.vartype} --bin-afs" : "--feature-table generic" } + ext.prefix = {"${meta.id}.${params.truth_id}.${meta.caller}"} + ext.args = { meta.caller.contains("strelka") || meta.caller.contains("varscan") || meta.caller.contains("pisces") ? "--feature-table hcc.${meta.caller}.${params.variant_type} --bin-afs" : "--feature-table generic" } publishDir = [ - path: {"${params.outdir}/${meta.id}/sompy_bench"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/sompy"}, pattern: "*{.csv.gz,csv,json.gz,json,vcf.gz,vcf.gz.tbi,csv}", mode: params.publish_dir_mode ] } withName: "HAPPY_PREPY" { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}.${meta.caller}"} + ext.prefix = {"${meta.id}.${params.truth_id}.${meta.caller}"} ext.args = {"--fixchr --filter-nonref --bcftools-norm"} publishDir = [ - path: {"${params.outdir}/${meta.id}/preprocess"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/preprocess"}, pattern: "*{vcf.gz}", mode: params.publish_dir_mode ] } withName: "TRUVARI_BENCH" { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}.${meta.caller}"} + ext.prefix = {"${meta.id}.${params.truth_id}.${meta.caller}"} ext.args = {[ "--sizemin 0 --sizefilt 0 --sizemax 5000000", (meta.pctseq != null) ? "--pctsize ${meta.pctsize}" : '', @@ -214,30 +212,28 @@ process { (meta.dup_to_ins) ? "--dup-to-ins" : '', (meta.typeignore) ? "--typeignore" : '' ].join(' ').trim()} - ext.when = { params.method.split(',').contains('truvari') } publishDir = [ - path: {"${params.outdir}/${meta.id}/truvari_bench"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/truvari"}, pattern: "*{.vcf.gz,vcf.gz.tbi,json}", mode: params.publish_dir_mode ] } withName: SVANALYZER_SVBENCHMARK { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}.${meta.caller}"} + ext.prefix = {"${meta.id}.${params.truth_id}.${meta.caller}"} ext.args = {[ (meta.normshift != null) ? "-normshift ${meta.normshift}" : '', (meta.normdist != null) ? "-normdist ${meta.normdist}" : '', (meta.normsizediff != null) ? "-normsizediff ${meta.normsizediff}" : '', (meta.maxdist != null) ? "-maxdist ${meta.maxdist}" : '' ].join(' ').trim()} - ext.when = { params.method.split(',').contains('svanalyzer') } publishDir = [ - path: {"${params.outdir}/${meta.id}/svanalyzer_bench"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/svanalyzer"}, pattern: "*{.vcf.gz,tbi,distances,log,report}", mode: params.publish_dir_mode ] } withName: WITTYER { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}.${meta.caller}"} + ext.prefix = {"${meta.id}.${params.truth_id}.${meta.caller}"} ext.args = {[ "--includedFilters=''", (meta.evaluationmode != null) ? "-em ${meta.evaluationmode}" : '', @@ -247,32 +243,32 @@ process { (meta.maxMatches != null) ? "--mm ${meta.maxMatches}" : '' ].join(' ').trim()} publishDir = [ - path: {"${params.outdir}/${meta.id}/wittyer_bench"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/wittyer_bench"}, pattern: "*{.vcf.gz,tbi,json}", mode: params.publish_dir_mode ] } withName: BAMSURGEON_EVALUATOR { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} + ext.prefix = {"${meta.id}.${params.truth_id}.${meta.caller}"} publishDir = [ - path: {"${params.outdir}/${meta.id}/bamsurgeon_evalator"}, + path: {"${params.outdir}/${params.variant_type}/${meta.id}/benchmarks/bamsurgeon"}, pattern: "*{.vcf,stats}", mode: params.publish_dir_mode ] } // summary reports withName: MERGE_REPORTS { - ext.prefix = {"${meta.benchmark_tool}.${meta.vartype}"} + ext.prefix = {"${meta.benchmark_tool}"} publishDir = [ - path: {"${params.outdir}/summary/tables/${meta.vartype}/${meta.benchmark_tool}"}, + path: {"${params.outdir}/${params.variant_type}/summary/tables/${meta.benchmark_tool}"}, pattern: "*{csv}", mode: params.publish_dir_mode ] } withName: PLOTS { - ext.prefix = {"${meta.benchmark_tool}.${meta.vartype}"} + ext.prefix = {"${meta.benchmark_tool}"} publishDir = [ - path: {"${params.outdir}/summary/plots/${meta.vartype}/${meta.benchmark_tool}"}, + path: {"${params.outdir}/${params.variant_type}/summary/plots/${meta.benchmark_tool}"}, pattern: "*{png}", mode: params.publish_dir_mode ] @@ -280,7 +276,7 @@ process { withName: DATAVZRD { ext.prefix = {"${meta.id}"} publishDir = [ - path: {"${params.outdir}/summary/datavzrd/${meta.vartype}"}, + path: {"${params.outdir}/${params.variant_type}/summary/datavzrd/"}, pattern: "*", mode: params.publish_dir_mode ] @@ -290,29 +286,28 @@ process { ext.prefix = {input.toString() - ".vcf.gz"} } withName: SURVIVOR_MERGE { - ext.prefix = {"${meta.id}.${meta.vartype}.${meta.tag}"} + ext.prefix = {"${meta.id}.${meta.tag}"} } withName: BCFTOOLS_MERGE { - ext.prefix = {"${meta.id}.${meta.vartype}.${meta.tag}"} + ext.prefix = {"${meta.id}.${meta.tag}"} ext.args = {"--output-type v --force-samples --force-single"} } withName: VCF_TO_CSV { - ext.prefix = {"${meta.id}.${meta.vartype}.${meta.tag}"} + ext.prefix = {"${meta.id}.${meta.tag}"} publishDir = [ - path: {"${params.outdir}/summary/comparisons/${meta.vartype}"}, + path: {"${params.outdir}/${params.variant_type}/summary/comparisons/${meta.vartype}"}, pattern: "*{.csv}", mode: params.publish_dir_mode ] } withName: REFORMAT_HEADER { - ext.prefix ={["${meta.id}.${meta.vartype}", + ext.prefix ={["${meta.id}", (meta.tag != null) ? ".${meta.tag}" : '' ].join('').trim() } } // liftOver withName: PICARD_CREATESEQUENCEDICTIONARY { - ext.when = { params.liftover } publishDir = [ path: {"${params.outdir}/references/dictionary"}, pattern: "*{.dict}", @@ -320,32 +315,28 @@ process { ] } withName: PICARD_LIFTOVERVCF { - ext.prefix = {"${meta.id}.${meta.vartype}"} + ext.prefix = {"${meta.id}}"} ext.args = {"--WARN_ON_MISSING_CONTIG true"} - ext.when = { params.liftover } } withName: BCFTOOLS_RENAME_CHR { - ext.prefix = {"${meta.id}.${meta.vartype}.renamechr"} + ext.prefix = {"${meta.id}.renamechr"} ext.args = {"--output-type z"} - ext.when = { params.liftover } publishDir = [ - path: {"${params.outdir}/${params.sample}/liftover"}, + path: {"${params.outdir}/${params.truth_id}/liftover"}, pattern: "*{.vcf.gz}", mode: params.publish_dir_mode ] } withName: UCSC_LIFTOVER { - ext.prefix = {"${meta.id}.${meta.vartype}.liftover"} - ext.when = { params.liftover } + ext.prefix = {"${meta.id}.liftover"} } withName: SORT_BED { - ext.prefix = {"${meta.id}.${meta.vartype}.sort"} - ext.when = { params.liftover } + ext.prefix = {"${meta.id}.sort"} } withName: BEDTOOLS_MERGE { ext.prefix = {bed.toString() - ".bed" + ".merged" } publishDir = [ - path: {"${params.outdir}/${params.sample}/liftover"}, + path: {"${params.outdir}/${params.variant_type}/${params.truth_id}/liftover"}, pattern: "*{.bed}", mode: params.publish_dir_mode ] diff --git a/conf/test.config b/conf/test.config deleted file mode 100644 index c6518d2..0000000 --- a/conf/test.config +++ /dev/null @@ -1,48 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/variantbenchmarking -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data - sample = "HG002" - input = "assets/samplesheet.csv" - outdir = "results" - - // Genome references - genome = 'GRCh38' - - // Processes - analysis = 'germline' - method = 'happy,truvari,svanalyzer,wittyer,rtgtools' // - preprocess = "normalization,deduplication,prepy" - sv_standardization = "svync,homogenize" - - include_expression = 'FILTER="."' - min_sv_size = 30 - - truth_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/CRMG_benchmark_dataset/HG002_GRCh38_CMRG_smallvar_v1.00.vcf.gz" - high_conf_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/CRMG_benchmark_dataset/HG002_GRCh38_CMRG_smallvar_v1.00.bed" - truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz" - high_conf_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed" - - itruth_ignore = true -} diff --git a/conf/test_full.config b/conf/test_full.config index 71ad921..dd92cdd 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -20,13 +20,13 @@ params { max_time = '8.h' // Input data - input = 'assets/samplesheet_full.csv' + input = null outdir = 'results' // Genome references genome = 'GRCh37' analysis = 'germline' - sample = "HG002" + truth_id = "HG002" method = 'truvari,svanalyzer,happy,rtgtools' diff --git a/conf/test_liftover.config b/conf/test_liftover.config deleted file mode 100644 index db3dee4..0000000 --- a/conf/test_liftover.config +++ /dev/null @@ -1,44 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/benchmark -profile test_liftover, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.5GB' - max_time = '8.h' - - // Input data - input = 'assets/samplesheet.csv' - outdir = 'results' - genome = 'GRCh38' - - // Processes - analysis = 'germline' - sample = "HG002" - method = 'truvari,svanalyzer,rtgtools,happy' - preprocess = "normalization,deduplication,filter_contigs" - min_sv_size = 30 - - truth_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.chr21.vcf.gz" - high_conf_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.bed" - truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.vcf.gz" - high_conf_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.bed" - itruth_ignore = true - - //liftover files - chain = "https://raw.githubusercontent.com/broadinstitute/gatk/master/scripts/funcotator/data_sources/gnomAD/b37ToHg38.over.chain" - rename_chr = "assets/rename_contigs/grch37_grch38.txt" - liftover = true -} diff --git a/conf/test_somatic.config b/conf/test_somatic.config deleted file mode 100644 index 345b68e..0000000 --- a/conf/test_somatic.config +++ /dev/null @@ -1,39 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/variantbenchmarking -profile test_somatic, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.5GB' - max_time = '8.h' - - // Input data - input = 'assets/samplesheet_somatic.csv' - outdir = 'results' - - // Genome references - genome = 'GRCh38' - analysis = 'somatic' - sample = "SEQC2" - method = 'sompy,truvari' - preprocess = "filter_contigs" - - truth_snv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/truth/hg38/sSNV_truth_set_v1.0.chr21.vcf.gz" - high_conf_snv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/truth/hg38/high-confidence_sSNV_in_HC_regions_v1.2.chr21.vcf.gz" - truth_indel = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/truth/hg38/sINDEL_truth_set_v1.0.chr21.vcf.gz" - high_conf_indel = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/truth/hg38/high-confidence_sINDEL_in_HC_regions_v1.2.chr21.vcf.gz" - truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/truth/hg38/13059_2022_2816_MOESM4_ESM.vcf.gz" - itruth_ignore = true -} diff --git a/conf/test_stub.config b/conf/test_stub.config index fdbf8b8..8ecbb74 100644 --- a/conf/test_stub.config +++ b/conf/test_stub.config @@ -21,7 +21,7 @@ params { // Input data sample = "HG002" - input = "assets/samplesheet_somatic.csv" + input = "assets/samplesheet_sv.csv" outdir = "results" // Genome references diff --git a/conf/truth.config b/conf/truth.config index cda4ac1..6ed1b21 100644 --- a/conf/truth.config +++ b/conf/truth.config @@ -12,35 +12,52 @@ params { // Base directory for truth data germline { 'GRCh38' { - 'HG002'{ - truth_small = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz" - high_conf_small = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed" - truth_sv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/CMRG_v1.00/GRCh38/StructuralVariant/HG002_GRCh38_CMRG_SV_v1.00.vcf.gz" - high_conf_sv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/CMRG_v1.00/GRCh38/StructuralVariant/HG002_GRCh38_CMRG_SV_v1.00.bed" - chain = "http://ftp.ensembl.org/pub/assembly_mapping/homo_sapiens/GRCh38_to_GRCh37.chain.gz" - rename_chr = "assets/rename_contigs/grch38_grch37.txt" + "small" { + 'HG002'{ + truth_vcf = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz" + regions_bed = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed" + } } + "somatic" { + 'HG002'{ + truth_vcf = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/CMRG_v1.00/GRCh38/StructuralVariant/HG002_GRCh38_CMRG_SV_v1.00.vcf.gz" + regions_bed = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/CMRG_v1.00/GRCh38/StructuralVariant/HG002_GRCh38_CMRG_SV_v1.00.bed" + } + } + } 'GRCh37'{ - 'HG002'{ - truth_small = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh37/SupplementaryFiles/HG002_GRCh37_1_22_v4.2.1_highconf.vcf.gz" - high_conf_small = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh37/SupplementaryFiles/HG002_GRCh37_1_22_v4.2.1_highconf.bed" - truth_sv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz" - high_conf_sv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/HG002_SVs_Tier1_v0.6.bed" - chain = "https://raw.githubusercontent.com/broadinstitute/gatk/master/scripts/funcotator/data_sources/gnomAD/b37ToHg38.over.chain" - rename_chr = "assets/rename_contigs/grch37_grch38.txt" + "small" { + 'HG002'{ + truth_vcf = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh37/SupplementaryFiles/HG002_GRCh37_1_22_v4.2.1_highconf.vcf.gz" + regions_bed = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh37/SupplementaryFiles/HG002_GRCh37_1_22_v4.2.1_highconf.bed" + } + + } + "structural" { + 'HG002'{ + truth_vcf = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz" + regions_bed = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/HG002_SVs_Tier1_v0.6.bed" + } } + } } somatic{ 'GRCh38'{ - 'SEQC2'{ - truth_snv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/high-confidence_sSNV_in_HC_regions_v1.2.1.vcf.gz" - high_conf_snv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/High-Confidence_Regions_v1.2.bed" - truth_indel = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/high-confidence_sINDEL_in_HC_regions_v1.2.1.vcf.gz" - high_conf_indel = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/High-Confidence_Regions_v1.2.bed" - + "snv"{ + 'SEQC2'{ + truth_vcf = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/high-confidence_sSNV_in_HC_regions_v1.2.1.vcf.gz" + regions_bed = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/High-Confidence_Regions_v1.2.bed" + } } + "indel" { + 'SEQC2'{ + truth_indel = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/high-confidence_sINDEL_in_HC_regions_v1.2.1.vcf.gz" + high_conf_indel = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/High-Confidence_Regions_v1.2.bed" + } + } + } } } diff --git a/nextflow.config b/nextflow.config index 8736a95..f8484a8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,22 +9,23 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null outdir = "results" - sample = null + + // Option to select analysis type: germline or somatic analysis = null - sdf = null - dictionary = null - liftover = false + + // Option to select variant type to analysis: small, snv, indel, somatic, copynumber + variant_type = null + + // Truth parameters + truth_id = null // Benchmarking method method = 'truvari,svanalyzer,happy,rtgtools,wittyer' // Preprocess spesific parameters - // normalization includes braking down multiallelic samples -m any - // deduplication removes one of the variants in the same position preprocess = "" sv_standardization = "" @@ -41,6 +42,9 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false + sdf = null + dictionary = null + liftover = false // Truth library itruth_ignore = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 51ed763..8ff5b2b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["input", "outdir", "truth_id", "analysis", "variant_type", "method", "truth_vcf"], "properties": { "input": { "type": "string", @@ -29,9 +29,9 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, - "sample": { + "truth_id": { "type": "string", - "description": "Sample id", + "description": "Truth id, sample name to define truth vcf", "fa_icon": "fas fa-folder-open", "errorMessage": "The sample name of the truth case. Examples: HG002, SEQC2, HG001, HG003, CHM13" }, @@ -39,6 +39,16 @@ "type": "string", "description": "The analysis type used by the input files", "enum": ["germline", "somatic"], + "pattern": "(germline|somatic)", + "fa_icon": "fas fa-folder-open", + "errorMessage": "Analysis type has to be choosen: germline or somatic" + }, + "variant_type": { + "type": "string", + "description": "Variant types to benchmark", + "errorMessage": "Select a variant type to make the analysis: small,snv,indel,structural or copynumber. Select small when your vcf contains both snvs and indels", + "enum": ["small", "snv", "indel", "structural", "copynumber"], + "pattern": "(small|snv|indel|structural|copynumber)", "fa_icon": "fas fa-folder-open" }, "method": { @@ -48,96 +58,24 @@ "pattern": "^((truvari|svanalyzer|happy|sompy|rtgtools|wittyer)?,?)*(? tuple([id: params.truth_id], file)}, chain.map{meta, file -> file} ) versions = versions.mix(UCSC_LIFTOVER.out.versions.first()) diff --git a/subworkflows/local/prepare_vcfs_test.nf b/subworkflows/local/prepare_vcfs_test.nf index 45f3be1..e1a6f3f 100644 --- a/subworkflows/local/prepare_vcfs_test.nf +++ b/subworkflows/local/prepare_vcfs_test.nf @@ -88,22 +88,25 @@ workflow PREPARE_VCFS_TEST { if (params.analysis.contains("somatic")){ // somatic spesific preperations - vcf_ch.branch{ - def meta = it[0] - small: meta.vartype == "small" - other: true - } - .set{vcf} + //vcf_ch.branch{ + // def meta = it[0] + // small: meta.vartype == "small" + // other: true + // } + // .set{vcf} + + if (params.variant_type == "small"){ + out_vcf_ch = Channel.empty() + + SPLIT_SMALL_VARIANTS_TEST( + vcf.small + ) + versions = versions.mix(SPLIT_SMALL_VARIANTS_TEST.out.versions.first()) + out_vcf_ch = out_vcf_ch.mix(SPLIT_SMALL_VARIANTS_TEST.out.out_vcf_ch, + vcf.other) + vcf_ch = out_vcf_ch + } - out_vcf_ch = Channel.empty() - - SPLIT_SMALL_VARIANTS_TEST( - vcf.small - ) - versions = versions.mix(SPLIT_SMALL_VARIANTS_TEST.out.versions.first()) - out_vcf_ch = out_vcf_ch.mix(SPLIT_SMALL_VARIANTS_TEST.out.out_vcf_ch, - vcf.other) - vcf_ch = out_vcf_ch } emit: diff --git a/subworkflows/local/prepare_vcfs_truth.nf b/subworkflows/local/prepare_vcfs_truth.nf index 6278699..4db015f 100644 --- a/subworkflows/local/prepare_vcfs_truth.nf +++ b/subworkflows/local/prepare_vcfs_truth.nf @@ -13,7 +13,7 @@ include { LIFTOVER_VCFS_TRUTH } from '../local/liftover_vcfs_truth' workflow PREPARE_VCFS_TRUTH { take: truth_ch // channel: [val(meta), vcf] - high_conf_ch // channel: [val(meta), bed] + high_conf_ch // channel: [bed] fasta // reference channel [val(meta), ref.fa] fai // reference channel [val(meta), ref.fa.fai] chain // reference channel [val(meta), chain.gz] @@ -37,7 +37,7 @@ workflow PREPARE_VCFS_TRUTH { ) versions = versions.mix(LIFTOVER_VCFS_TRUTH.out.versions.first()) truth_ch = LIFTOVER_VCFS_TRUTH.out.vcf_ch - high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch + high_conf_ch = LIFTOVER_VCFS_TRUTH.out.bed_ch.map{ meta, bed -> [bed]} } // Reheader sample name for truth file - using meta.caller diff --git a/subworkflows/local/report_benchmark_statistics.nf b/subworkflows/local/report_benchmark_statistics.nf index 8ed96dd..a58afed 100644 --- a/subworkflows/local/report_benchmark_statistics.nf +++ b/subworkflows/local/report_benchmark_statistics.nf @@ -27,7 +27,7 @@ workflow REPORT_BENCHMARK_STATISTICS { versions = versions.mix(PLOTS.out.versions.first()) MERGE_REPORTS.out.summary - .map { meta, file -> tuple([vartype: meta.vartype] + [id: meta.benchmark_tool], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [id: meta.benchmark_tool], file) } .set { summary } // add path to csv file to the datavzrd input diff --git a/subworkflows/local/report_vcf_statistics.nf b/subworkflows/local/report_vcf_statistics.nf index 3a3423c..b6c283a 100644 --- a/subworkflows/local/report_vcf_statistics.nf +++ b/subworkflows/local/report_vcf_statistics.nf @@ -12,25 +12,29 @@ workflow REPORT_VCF_STATISTICS { main: versions = Channel.empty() + survivor_stats = Channel.empty() + + //input_ch.branch{ + // def meta = it[0] + // sv: meta.vartype == "sv" || meta.vartype == "cnv" + // other: true + // } + // .set{input} + + if (params.variant_type == "structural"){ + // use survivor stats to get SV statistics by TYPE + SURVIVOR_STATS( + input_ch.map{ meta, vcf, tbi -> + [ meta, vcf ] + }, + -1, + -1, + -1 + ) + survivor_stats = SURVIVOR_STATS.out.stats + versions = versions.mix(SURVIVOR_STATS.out.versions.first()) + } - input_ch.branch{ - def meta = it[0] - sv: meta.vartype == "sv" || meta.vartype == "cnv" - other: true - } - .set{input} - - // use survivor stats to get SV statistics by TYPE - SURVIVOR_STATS( - input.sv.map{ meta, vcf, tbi -> - [ meta, vcf ] - }, - -1, - -1, - -1 - ) - survivor_stats = SURVIVOR_STATS.out.stats - versions = versions.mix(SURVIVOR_STATS.out.versions.first()) // use bcftools stats for all files BCFTOOLS_STATS( diff --git a/subworkflows/local/small_germline_benchmark.nf b/subworkflows/local/small_germline_benchmark.nf index 7bfd346..a74ee9c 100644 --- a/subworkflows/local/small_germline_benchmark.nf +++ b/subworkflows/local/small_germline_benchmark.nf @@ -48,7 +48,7 @@ workflow SMALL_GERMLINE_BENCHMARK { // collect summary reports RTGTOOLS_VCFEVAL.out.summary - .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "rtgtools"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "rtgtools"], file) } .groupTuple() .set{ report } @@ -62,7 +62,7 @@ workflow SMALL_GERMLINE_BENCHMARK { versions = versions.mix(VCF_REHEADER_SAMPLENAME_1.out.versions) VCF_REHEADER_SAMPLENAME_1.out.ch_vcf - .map { meta, file, index -> tuple([vartype: meta.vartype] + [tag: "FN"] + [id: "rtgtools"], file, index) } + .map { meta, file, index -> tuple([vartype: params.variant_type] + [tag: "FN"] + [id: "rtgtools"], file, index) } .set { vcf_fn } VCF_REHEADER_SAMPLENAME_2( @@ -72,7 +72,7 @@ workflow SMALL_GERMLINE_BENCHMARK { versions = versions.mix(VCF_REHEADER_SAMPLENAME_2.out.versions) VCF_REHEADER_SAMPLENAME_2.out.ch_vcf - .map { meta, file, index -> tuple([vartype: meta.vartype] + [tag: "FP"] + [id: "rtgtools"], file, index) } + .map { meta, file, index -> tuple([vartype: params.variant_type] + [tag: "FP"] + [id: "rtgtools"], file, index) } .set { vcf_fp } VCF_REHEADER_SAMPLENAME_3( @@ -82,7 +82,7 @@ workflow SMALL_GERMLINE_BENCHMARK { versions = versions.mix(VCF_REHEADER_SAMPLENAME_3.out.versions) VCF_REHEADER_SAMPLENAME_3.out.ch_vcf - .map { meta, file, index -> tuple([vartype: meta.vartype] + [tag: "TP_base"] + [id: "rtgtools"], file, index) } + .map { meta, file, index -> tuple([vartype: params.variant_type] + [tag: "TP_base"] + [id: "rtgtools"], file, index) } .set { vcf_tp_base } VCF_REHEADER_SAMPLENAME_4( @@ -92,7 +92,7 @@ workflow SMALL_GERMLINE_BENCHMARK { versions = versions.mix(VCF_REHEADER_SAMPLENAME_4.out.versions) VCF_REHEADER_SAMPLENAME_4.out.ch_vcf - .map { meta, file, index -> tuple([vartype: meta.vartype] + [tag: "TP_comp"] + [id: "rtgtools"], file, index) } + .map { meta, file, index -> tuple([vartype: params.variant_type] + [tag: "TP_comp"] + [id: "rtgtools"], file, index) } .set { vcf_tp_comp } tagged_variants = tagged_variants.mix( @@ -146,7 +146,7 @@ workflow SMALL_GERMLINE_BENCHMARK { // tag meta and collect summary reports HAPPY_HAPPY.out.summary_csv - .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "happy"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "happy"], file) } .groupTuple() .set{ report } summary_reports = summary_reports.mix(report) diff --git a/subworkflows/local/small_somatic_benchmark.nf b/subworkflows/local/small_somatic_benchmark.nf index cfd2e99..78fa535 100644 --- a/subworkflows/local/small_somatic_benchmark.nf +++ b/subworkflows/local/small_somatic_benchmark.nf @@ -31,7 +31,7 @@ workflow SMALL_SOMATIC_BENCHMARK { versions = versions.mix(HAPPY_SOMPY.out.versions.first()) HAPPY_SOMPY.out.stats - .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "sompy"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "sompy"], file) } .groupTuple() .set{ report } summary_reports = summary_reports.mix(report) @@ -50,7 +50,7 @@ workflow SMALL_SOMATIC_BENCHMARK { versions = versions.mix(BAMSURGEON_EVALUATOR.out.versions.first()) BAMSURGEON_EVALUATOR.out.stats - .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "bamsurgeon"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "bamsurgeon"], file) } .groupTuple() .set{ report } summary_reports = summary_reports.mix(report) diff --git a/subworkflows/local/sv_germline_benchmark.nf b/subworkflows/local/sv_germline_benchmark.nf index f67e2d1..56fa2a3 100644 --- a/subworkflows/local/sv_germline_benchmark.nf +++ b/subworkflows/local/sv_germline_benchmark.nf @@ -36,7 +36,7 @@ workflow SV_GERMLINE_BENCHMARK { versions = versions.mix(TRUVARI_BENCH.out.versions.first()) TRUVARI_BENCH.out.summary - .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "truvari"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "truvari"], file) } .groupTuple() .set { report } @@ -50,7 +50,7 @@ workflow SV_GERMLINE_BENCHMARK { versions = versions.mix(VCF_REHEADER_SAMPLENAME_1.out.versions) VCF_REHEADER_SAMPLENAME_1.out.ch_vcf - .map { meta, file, index -> tuple([vartype: meta.vartype] + [tag: "FN"] + [id: "truvari"], file) } + .map { meta, file, index -> tuple([vartype: params.variant_type] + [tag: "FN"] + [id: "truvari"], file) } .set { vcf_fn } // reheader fp vcf files for tagged results @@ -62,7 +62,7 @@ workflow SV_GERMLINE_BENCHMARK { // add tag and to meta VCF_REHEADER_SAMPLENAME_2.out.ch_vcf - .map { meta, file, index -> tuple([vartype: meta.vartype] + [tag: "FP"] + [id: "truvari"], file) } + .map { meta, file, index -> tuple([vartype: params.variant_type] + [tag: "FP"] + [id: "truvari"], file) } .set { vcf_fp } // reheader base tp vcf files for tagged results @@ -74,7 +74,7 @@ workflow SV_GERMLINE_BENCHMARK { // add tag and to meta VCF_REHEADER_SAMPLENAME_3.out.ch_vcf - .map { meta, file, index -> tuple([vartype: meta.vartype] + [tag: "TP_base"] + [id: "truvari"], file) } + .map { meta, file, index -> tuple([vartype: params.variant_type] + [tag: "TP_base"] + [id: "truvari"], file) } .set { vcf_tp_base } // reheader comp tp vcf files for tagged results @@ -86,7 +86,7 @@ workflow SV_GERMLINE_BENCHMARK { // add tag and to meta VCF_REHEADER_SAMPLENAME_4.out.ch_vcf - .map { meta, file, index -> tuple([vartype: meta.vartype] + [tag: "TP_comp"] + [id: "truvari"], file) } + .map { meta, file, index -> tuple([vartype: params.variant_type] + [tag: "TP_comp"] + [id: "truvari"], file) } .set { vcf_tp_comp } // collect tagged variant files @@ -108,7 +108,7 @@ workflow SV_GERMLINE_BENCHMARK { // tag and collect summary file SVANALYZER_SVBENCHMARK.out.report - .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "svbenchmark"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "svbenchmark"], file) } .groupTuple() .set{ report } @@ -116,12 +116,12 @@ workflow SV_GERMLINE_BENCHMARK { // reheader fn vcf files for tagged results SVANALYZER_SVBENCHMARK.out.fns - .map { meta, file -> tuple([vartype: meta.vartype] + [tag: "FN"] + [id: "svbenchmark"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [tag: "FN"] + [id: "svbenchmark"], file) } .set { vcf_fn } SVANALYZER_SVBENCHMARK.out.fps - .map { meta, file -> tuple([vartype: meta.vartype] + [tag: "FP"] + [id: "svbenchmark"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [tag: "FP"] + [id: "svbenchmark"], file) } .set { vcf_fp } tagged_variants = tagged_variants.mix( @@ -163,7 +163,7 @@ workflow SV_GERMLINE_BENCHMARK { versions = versions.mix(WITTYER.out.versions.first()) WITTYER.out.report - .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "wittyer"], file) } + .map { meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "wittyer"], file) } .groupTuple() .set{ report } summary_reports = summary_reports.mix(report) diff --git a/subworkflows/local/sv_vcf_conversion.nf b/subworkflows/local/sv_vcf_conversion.nf index ec3717d..c92e214 100644 --- a/subworkflows/local/sv_vcf_conversion.nf +++ b/subworkflows/local/sv_vcf_conversion.nf @@ -58,7 +58,7 @@ workflow SV_VCF_CONVERSIONS { tool: supported return [ meta, vcf, tbi] other: !supported - return [ meta, vcf, tbi ] + return [ meta, vcf ] } .set{input} @@ -80,16 +80,11 @@ workflow SV_VCF_CONVERSIONS { .map{ def meta = it[0] def vcf = it[1] - def tbi = it[2] - [ meta, vcf, tbi ] + [ meta, vcf ] } .set { vcf_ch } - out_vcf_ch = out_vcf_ch.mix(SVYNC.out.vcf, - input.other) - vcf_ch = out_vcf_ch.map{it -> tuple(it[0], it[1], it[2])} } - emit: vcf_ch versions diff --git a/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf b/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf index 56e2038..2336434 100644 --- a/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf @@ -159,42 +159,42 @@ def getGenomeAttribute(attribute) { // def getTruthAttribute(attribute) { if (!params.itruth_ignore){ - if (params.sample && params.analysis && params.genome) { + if (params.truth_id && params.analysis && params.genome) { if (params.liftover){ if (params.genome == "GRCh38"){ if (params.analysis == "somatic"){ - if (params.somatic[ "GRCh37" ][ params.sample ].containsKey(attribute)){ - return params.somatic[ "GRCh37" ][ params.sample ][ attribute ] + if (params.somatic[ "GRCh37" ][ params.truth_id ].containsKey(attribute)){ + return params.somatic[ "GRCh37" ][ params.truth_id ][ attribute ] } } else if(params.analysis == "germline") { - if (params.germline[ "GRCh37" ][ params.sample ].containsKey(attribute)){ - return params.germline[ "GRCh37" ][ params.sample ][ attribute ] + if (params.germline[ "GRCh37" ][ params.truth_id ].containsKey(attribute)){ + return params.germline[ "GRCh37" ][ params.truth_id ][ attribute ] } } } else if (params.genome == "GRCh37"){ if (params.analysis == "somatic"){ - if (params.somatic[ "GRCh38" ][ params.sample ].containsKey(attribute)){ - return params.somatic[ "GRCh38" ][ params.sample ][ attribute ] + if (params.somatic[ "GRCh38" ][ params.truth_id ].containsKey(attribute)){ + return params.somatic[ "GRCh38" ][ params.truth_id ][ attribute ] } } else if(params.analysis == "germline") { - if (params.germline[ "GRCh38" ][ params.sample ].containsKey(attribute)){ - return params.germline[ "GRCh38" ][ params.sample ][ attribute ] + if (params.germline[ "GRCh38" ][ params.truth_id ].containsKey(attribute)){ + return params.germline[ "GRCh38" ][ params.truth_id ][ attribute ] } } } } else{ if (params.analysis == "somatic"){ - if (params.somatic[ params.genome ][ params.sample ].containsKey(attribute)){ - return params.somatic[ params.genome ][ params.sample ][ attribute ] + if (params.somatic[ params.genome ][ params.truth_id ].containsKey(attribute)){ + return params.somatic[ params.genome ][ params.truth_id ][ attribute ] } } else if(params.analysis == "germline") { - if (params.germline[ params.genome ][ params.sample ].containsKey(attribute)){ - return params.germline[ params.genome][ params.sample ][ attribute ] + if (params.germline[ params.genome ][ params.truth_id ].containsKey(attribute)){ + return params.germline[ params.genome][ params.truth_id ][ attribute ] } } diff --git a/subworkflows/local/vcf_variant_filtering.nf b/subworkflows/local/vcf_variant_filtering.nf index 78c27a2..fda853a 100644 --- a/subworkflows/local/vcf_variant_filtering.nf +++ b/subworkflows/local/vcf_variant_filtering.nf @@ -33,30 +33,21 @@ workflow VCF_VARIANT_FILTERING { vcf_ch = BCFTOOLS_FILTER.out.vcf } - out_vcf_ch = Channel.empty() - if(params.min_sv_size > 0 | params.max_sv_size != -1 | params.min_allele_freq != -1 | params.min_num_reads != -1){ - vcf_ch.branch{ - def meta = it[0] - sv: meta.vartype == "sv" - other: true} - .set{vcf} // filters out smaller SVs than min_sv_size, only applicable to SV files - SURVIVOR_FILTER( - vcf.sv.map{meta, vcf -> tuple( meta, vcf, [])}, - params.min_sv_size, - params.max_sv_size, - params.min_allele_freq, - params.min_num_reads - ) - versions = versions.mix(SURVIVOR_FILTER.out.versions.first()) - - out_vcf_ch = out_vcf_ch.mix(SURVIVOR_FILTER.out.vcf, - vcf.other) - vcf_ch = out_vcf_ch + if (params.variant_type == "structural"){ + SURVIVOR_FILTER( + vcf_ch.map{meta, vcf -> tuple( meta, vcf, [])}, + params.min_sv_size, + params.max_sv_size, + params.min_allele_freq, + params.min_num_reads + ) + versions = versions.mix(SURVIVOR_FILTER.out.versions.first()) + vcf_ch = SURVIVOR_FILTER.out.vcf + } } - // zip and index vcf files TABIX_BGZIPTABIX( vcf_ch diff --git a/workflows/variantbenchmarking.nf b/workflows/variantbenchmarking.nf index eca9072..734bb00 100644 --- a/workflows/variantbenchmarking.nf +++ b/workflows/variantbenchmarking.nf @@ -54,11 +54,6 @@ workflow VARIANTBENCHMARKING { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() ch_reports = Channel.empty() - truth_ch = Channel.empty() - high_conf_ch = Channel.empty() - bench_ch = Channel.empty() - sv_evals_ch = Channel.empty() - small_evals_ch = Channel.empty() //// create reference channels //// @@ -67,56 +62,16 @@ workflow VARIANTBENCHMARKING { fai = Channel.fromPath(params.fai, checkIfExists: true) .map{ fai -> tuple([id: fai.getSimpleName()], fai) }.collect() - //// check high confidence files //// + //// check Truth Files //// - // Germline + truth_ch = Channel.fromPath(params.truth_vcf, checkIfExists: true) + .map{ vcf -> tuple([id: params.truth_id, vartype:params.variant_type], vcf) }.collect() - truth_small = params.truth_small ? Channel.fromPath(params.truth_small, checkIfExists: true).map{ vcf -> tuple([id: params.sample, vartype:"small"], vcf) }.collect() + regions_bed_ch = params.regions_bed ? Channel.fromPath(params.regions_bed, checkIfExists: true).collect() : Channel.empty() - truth_ch = truth_ch.mix(truth_small) - - high_conf_small = params.high_conf_small ? Channel.fromPath(params.high_conf_small, checkIfExists: true).map{ bed -> tuple([id: params.sample, vartype:"small"], bed) }.collect() - : Channel.empty() - high_conf_ch = high_conf_ch.mix(high_conf_small) - - truth_sv = params.truth_sv ? Channel.fromPath(params.truth_sv, checkIfExists: true).map{ vcf -> tuple([id: params.sample, vartype:"sv"], vcf) }.collect() - : Channel.empty() - truth_ch = truth_ch.mix(truth_sv) - - high_conf_sv = params.high_conf_sv ? Channel.fromPath(params.high_conf_sv, checkIfExists: true).map{ bed -> tuple([id: params.sample, vartype:"sv"], bed) }.collect() - : Channel.empty() - high_conf_ch = high_conf_ch.mix(high_conf_sv) - - truth_cnv = params.truth_cnv ? Channel.fromPath(params.truth_cnv, checkIfExists: true).map{ vcf -> tuple([id: params.sample, vartype:"cnv"], vcf) }.collect() - : Channel.empty() - truth_ch = truth_ch.mix(truth_cnv) - - high_conf_cnv = params.high_conf_cnv ? Channel.fromPath(params.high_conf_cnv, checkIfExists: true).map{ bed -> tuple([id: params.sample, vartype:"cnv"], bed) }.collect() - : Channel.empty() - high_conf_ch = high_conf_ch.mix(high_conf_cnv) - - // Somatic - // snv and indel seperation only possible for somatic cases - - truth_snv = params.truth_snv ? Channel.fromPath(params.truth_snv, checkIfExists: true).map{ vcf -> tuple([id: params.sample, vartype:"snv"], vcf) }.collect() - : Channel.empty() - truth_ch = truth_ch.mix(truth_snv) - - high_conf_snv = params.high_conf_snv ? Channel.fromPath(params.high_conf_snv, checkIfExists: true).map{ bed -> tuple([id: params.sample, vartype:"snv"], bed) }.collect() - : Channel.empty() - high_conf_ch = high_conf_ch.mix(high_conf_snv) - - truth_indel = params.truth_indel ? Channel.fromPath(params.truth_indel, checkIfExists: true).map{ vcf -> tuple([id: params.sample, vartype:"indel"], vcf) }.collect() - : Channel.empty() - truth_ch = truth_ch.mix(truth_indel) - - high_conf_indel = params.high_conf_indel ? Channel.fromPath(params.high_conf_indel, checkIfExists: true).map{ bed -> tuple([id: params.sample, vartype:"indel"], bed) }.collect() - : Channel.empty() - high_conf_ch = high_conf_ch.mix(high_conf_indel) - // SDF file for RTG-tools eval - sdf = params.sdf ? Channel.fromPath(params.sdf, checkIfExists: true).map{ sdf -> tuple([id: sdf.getSimpleName()], sdf) }.collect() + sdf = params.sdf ? Channel.fromPath(params.sdf, checkIfExists: true).map{ sdf -> tuple([id: sdf.getSimpleName()], sdf) }.collect() : Channel.empty() // read chainfile, liftover genome and rename chr files if liftover is true @@ -145,7 +100,6 @@ workflow VARIANTBENCHMARKING { out_vcf_ch = Channel.empty() - input.multisample.view() SUBSAMPLE_VCF_TEST( input.multisample ) @@ -154,27 +108,20 @@ workflow VARIANTBENCHMARKING { input.other) vcf_ch = out_vcf_ch - // Branch out according to the analysis - vcf_ch.branch{ - sv: it[0].vartype == "sv" - other: true} - .set{test_ch} - - out_vcf_ch = Channel.empty() - - // Standardize SV VCFs, tool spesific modifications - SV_VCF_CONVERSIONS( - test_ch.sv, - fasta, - fai - ) - ch_versions = ch_versions.mix(SV_VCF_CONVERSIONS.out.versions) - out_vcf_ch = out_vcf_ch.mix(SV_VCF_CONVERSIONS.out.vcf_ch.map{it -> tuple(it[0], it[1])}, - test_ch.other) + if (params.variant_type == "structural"){ + // Standardize SV VCFs, tool spesific modifications + SV_VCF_CONVERSIONS( + vcf_ch, + fasta, + fai + ) + ch_versions = ch_versions.mix(SV_VCF_CONVERSIONS.out.versions) + vcf_ch = SV_VCF_CONVERSIONS.out.vcf_ch.map{it -> tuple(it[0], it[1])} + } // Prepare and normalize input vcfs PREPARE_VCFS_TEST( - out_vcf_ch, + vcf_ch, fasta, fai ) @@ -183,14 +130,14 @@ workflow VARIANTBENCHMARKING { // Prepare and normalize truth vcfs PREPARE_VCFS_TRUTH( truth_ch, - high_conf_ch, + regions_bed_ch, fasta, fai, chain, rename_chr, dictionary ) - high_conf_ch = PREPARE_VCFS_TRUTH.out.high_conf_ch + regions_bed_ch = PREPARE_VCFS_TRUTH.out.high_conf_ch ch_versions = ch_versions.mix(PREPARE_VCFS_TRUTH.out.versions) // VCF REPORTS AND STATS @@ -201,204 +148,73 @@ workflow VARIANTBENCHMARKING { ) ch_versions = ch_versions.mix(REPORT_VCF_STATISTICS.out.versions) - // branch out input test files - PREPARE_VCFS_TEST.out.vcf_ch.branch{ - def meta = it[0] - sv: meta.vartype == "sv" - small: meta.vartype == "small" - cnv: meta.vartype == "cnv" - snv: meta.vartype == "snv" - indel: meta.vartype == "indel" - other: false - } - .set{test} + // Prepare benchmark channel + PREPARE_VCFS_TEST.out.vcf_ch.combine(PREPARE_VCFS_TRUTH.out.vcf_ch) + .combine(regions_bed_ch.ifEmpty([[]])) + .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi, high_bed -> + [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, high_bed ]} + .set{bench} - // branch out truth vcf files - PREPARE_VCFS_TRUTH.out.vcf_ch.branch{ - def meta = it[0] - sv: meta.vartype == "sv" - small: meta.vartype == "small" - cnv: meta.vartype == "cnv" - snv: meta.vartype == "snv" - indel: meta.vartype == "indel" - other: false - } - .set{truth} + evals_ch = Channel.empty() - // branch out high confidence bed files - high_conf_ch.branch{ - def meta = it[0] - sv: meta.vartype == "sv" - small: meta.vartype == "small" - cnv: meta.vartype == "cnv" - snv: meta.vartype == "snv" - indel: meta.vartype == "indel" - other: false - } - .set{high_conf} - - // prepare benchmark sets - if(params.truth_small){ - if(params.high_conf_small){ - test.small.combine(truth.small) - .combine(high_conf.small) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi, high_meta, high_bed -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, high_bed ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - else{ - test.small.combine(truth.small) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, [] ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - } - if(params.truth_sv){ - if(params.high_conf_sv){ - test.sv.combine(truth.sv) - .combine(high_conf.sv) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi, high_meta, high_bed -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, high_bed ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - else{ - test.sv.combine(truth.sv) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, [] ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - } - if(params.truth_cnv){ - if(params.high_conf_cnv){ - test.cnv.combine(truth.cnv) - .combine(high_conf.cnv) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi, high_meta, high_bed -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, high_bed ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - else{ - test.cnv.combine(truth.cnv) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, [] ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - } - if(params.truth_snv){ - if(params.high_conf_snv){ - test.snv.combine(truth.snv) - .combine(high_conf.snv) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi, high_meta, high_bed -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, high_bed ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - else{ - test.snv.combine(truth.snv) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, [] ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - } - if(params.truth_indel){ - if(params.high_conf_indel){ - test.indel.combine(truth.indel) - .combine(high_conf.indel) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi, high_meta, high_bed -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, high_bed ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } - else{ - test.indel.combine(truth.indel) - .map{ test_meta, test_vcf, test_tbi, truth_meta, truth_vcf, truth_tbi -> - [ test_meta, test_vcf, test_tbi, truth_vcf, truth_tbi, [] ] - } - .set{bench} - bench_ch = bench_ch.mix(bench) - } + if (params.variant_type == "structural"){ + // Perform SV benchmarking - for now it also works for somatic cases! + // this part will be changed! + SV_GERMLINE_BENCHMARK( + bench, + fasta, + fai + ) + ch_versions = ch_versions.mix(SV_GERMLINE_BENCHMARK.out.versions) + ch_reports = ch_reports.mix(SV_GERMLINE_BENCHMARK.out.summary_reports) + evals_ch = evals_ch.mix(SV_GERMLINE_BENCHMARK.out.tagged_variants) } - // branch out combined benchmark sets - bench_ch.branch{ - def meta = it[0] - sv: meta.vartype == "sv" - small: meta.vartype == "small" - cnv: meta.vartype == "cnv" - snv: meta.vartype == "snv" - indel: meta.vartype == "indel" - other: false - } - .set{bench_input} - - - // Perform SV benchmarking - for now it also works for somatic cases! - SV_GERMLINE_BENCHMARK( - bench_input.sv, - fasta, - fai - ) - ch_versions = ch_versions.mix(SV_GERMLINE_BENCHMARK.out.versions) - ch_reports = ch_reports.mix(SV_GERMLINE_BENCHMARK.out.summary_reports) - sv_evals_ch = sv_evals_ch.mix(SV_GERMLINE_BENCHMARK.out.tagged_variants) - if (params.analysis.contains("germline")){ - // Benchmarking spesific to small germline samples - SMALL_GERMLINE_BENCHMARK( - bench_input.small.mix(bench_input.snv,bench_input.indel), - fasta, - fai, - sdf - ) - ch_versions = ch_versions.mix(SMALL_GERMLINE_BENCHMARK.out.versions) - ch_reports = ch_reports.mix(SMALL_GERMLINE_BENCHMARK.out.summary_reports) - small_evals_ch = small_evals_ch.mix(SMALL_GERMLINE_BENCHMARK.out.tagged_variants) + if (params.variant_type == "small" | params.variant_type == "snv" | params.variant_type == "indel"){ + // Benchmarking spesific to small germline samples + SMALL_GERMLINE_BENCHMARK( + bench, + fasta, + fai, + sdf + ) + ch_versions = ch_versions.mix(SMALL_GERMLINE_BENCHMARK.out.versions) + ch_reports = ch_reports.mix(SMALL_GERMLINE_BENCHMARK.out.summary_reports) + evals_ch = evals_ch.mix(SMALL_GERMLINE_BENCHMARK.out.tagged_variants) + } - // Benchmarking spesific to CNV germline samples - CNV_GERMLINE_BENCHMARK( - bench_input.cnv, - fasta, - fai - ) - ch_versions = ch_versions.mix(CNV_GERMLINE_BENCHMARK.out.versions) - ch_reports = ch_reports.mix(CNV_GERMLINE_BENCHMARK.out.summary_reports) + if (params.variant_type == "copynumber"){ + // Benchmarking spesific to CNV germline samples + CNV_GERMLINE_BENCHMARK( + bench, + fasta, + fai + ) + ch_versions = ch_versions.mix(CNV_GERMLINE_BENCHMARK.out.versions) + ch_reports = ch_reports.mix(CNV_GERMLINE_BENCHMARK.out.summary_reports) + } } // TODO: SOMATIC BENCHMARKING if (params.analysis.contains("somatic")){ - somatic_small = bench_input.snv.mix(bench_input.indel) - // SOMATIC VARIANT BENCHMARKING - SMALL_SOMATIC_BENCHMARK( - somatic_small, - fasta, - fai - ) - ch_versions = ch_versions.mix(SMALL_SOMATIC_BENCHMARK.out.versions) - ch_reports = ch_reports.mix(SMALL_SOMATIC_BENCHMARK.out.summary_reports) + if (params.variant_type == "snv" | params.variant_type == "indel"){ + // SOMATIC VARIANT BENCHMARKING + SMALL_SOMATIC_BENCHMARK( + bench, + fasta, + fai + ) + ch_versions = ch_versions.mix(SMALL_SOMATIC_BENCHMARK.out.versions) + ch_reports = ch_reports.mix(SMALL_SOMATIC_BENCHMARK.out.summary_reports) + } } - // compare tool spesfic benchmarks COMPARE_BENCHMARK_RESULTS( - small_evals_ch, - sv_evals_ch, + evals_ch, fasta, fai ) From d080536662b48f174c09776a4c5150d34166c874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Wed, 6 Nov 2024 15:05:39 +0000 Subject: [PATCH 2/4] remove itruth --- conf/igenomes.config | 1 + conf/test_full.config | 12 +++-- main.nf | 20 ------- nextflow.config | 22 ++++---- nextflow_schema.json | 11 +--- .../main.nf | 52 ------------------- 6 files changed, 19 insertions(+), 99 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index cd55be5..188d4a2 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -37,6 +37,7 @@ params { mito_name = "chrM" macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } 'CHM13' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" diff --git a/conf/test_full.config b/conf/test_full.config index dd92cdd..1e44e45 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -20,17 +20,19 @@ params { max_time = '8.h' // Input data - input = null + input = 'assets/samplesheet_small.csv' outdir = 'results' // Genome references genome = 'GRCh37' analysis = 'germline' truth_id = "HG002" - - method = 'truvari,svanalyzer,happy,rtgtools' - + variant_type = "small" + method = 'happy,rtgtools' preprocess = "normalization,deduplication,filter_contigs" - min_sv_size = 30 + + truth_vcf = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh37/SupplementaryFiles/HG002_GRCh37_1_22_v4.2.1_highconf.vcf.gz" + regions_bed = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh37/SupplementaryFiles/HG002_GRCh37_1_22_v4.2.1_highconf.bed" + itruth_ignore = true } diff --git a/main.nf b/main.nf index 460f496..2e63537 100644 --- a/main.nf +++ b/main.nf @@ -19,26 +19,6 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_vari params.fasta = getGenomeAttribute('fasta') params.fai = getGenomeAttribute('fai') -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - TRUTH PARAMETER VALUES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -include { getTruthAttribute } from './subworkflows/local/utils_nfcore_variantbenchmarking_pipeline' - -params.truth_small = getTruthAttribute('truth_small') -params.high_conf_small = getTruthAttribute('high_conf_small') -params.truth_sv = getTruthAttribute('truth_sv') -params.high_conf_sv = getTruthAttribute('high_conf_sv') -params.truth_cnv = getTruthAttribute('truth_cnv') -params.high_conf_cnv = getTruthAttribute('high_conf_cnv') -params.truth_snv = getTruthAttribute('truth_snv') -params.high_conf_snv = getTruthAttribute('high_conf_snv') -params.truth_indel = getTruthAttribute('truth_indel') -params.high_conf_indel = getTruthAttribute('high_conf_indel') -params.chain = getTruthAttribute('chain') -params.rename_chr = getTruthAttribute('rename_chr') - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS diff --git a/nextflow.config b/nextflow.config index f8484a8..c887d6a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,9 +46,6 @@ params { dictionary = null liftover = false - // Truth library - itruth_ignore = false - // MultiQC options multiqc_config = null multiqc_title = null @@ -209,16 +206,6 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' - - -// Load truth.config if required -if (!params.itruth_ignore) { - includeConfig 'conf/truth.config' -} else { - params.somatic = [:] - params.germline = [:] - -} // Load igenomes.config if required includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' @@ -264,6 +251,14 @@ dag { enabled = true file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } +prov { + enabled = true + formats { + bco { + file = "${params.outdir}/pipeline_info/manifest_${trace_timestamp}.bco.json" + } + } +} manifest { name = 'nf-core/variantbenchmarking' @@ -279,6 +274,7 @@ manifest { // Nextflow plugins plugins { id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-prov@1.2.2' // Provenance reports for pipeline runs } validation { diff --git a/nextflow_schema.json b/nextflow_schema.json index 8ff5b2b..c9d2a76 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -196,13 +196,6 @@ "hidden": true, "default": "s3://ngi-igenomes/igenomes/" }, - "itruth_ignore": { - "type": "boolean", - "description": "Do not load the truth reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `truth.config` when running the pipeline. In this case user need to provide necessary truth files. You may choose this option if you observe clashes between custom parameters and those supplied in `truth.config`." - }, "liftover": { "type": "boolean", "description": "Run liftover workflow", @@ -216,7 +209,7 @@ "exists": true, "pattern": "^\\S+\\.(chain|bed)?(\\.gz)?$", "description": "Path to the chain file sey required for liftover.", - "help_text": "This parameter is *mandatory* if `--liftover` is true and --genome is not specified and --itruth is false. A chain can be found under itruth files and can be used together with --genome.", + "help_text": "This parameter is *mandatory* if `--liftover` is true", "fa_icon": "fas fa-file-csv" }, "rename_chr": { @@ -226,7 +219,7 @@ "mimetype": "text/plain", "pattern": "^\\S+\\.txt$", "description": "Path to the ranaming chromosomes for lifting over.", - "help_text": "This parameter is *mandatory* if `--liftover` is true and --genome is not specified and --itruth is false. A rename can be found under assets/rename_contigs or can be used together with --genome.", + "help_text": "This parameter is *mandatory* if `--liftover` is true", "fa_icon": "far fa-file-code" }, "dictionary": { diff --git a/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf b/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf index 2336434..4d516ea 100644 --- a/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_variantbenchmarking_pipeline/main.nf @@ -154,58 +154,6 @@ def getGenomeAttribute(attribute) { return null } -// -// Get attribute from truth config file e.g. truth_small -// -def getTruthAttribute(attribute) { - if (!params.itruth_ignore){ - if (params.truth_id && params.analysis && params.genome) { - if (params.liftover){ - if (params.genome == "GRCh38"){ - if (params.analysis == "somatic"){ - if (params.somatic[ "GRCh37" ][ params.truth_id ].containsKey(attribute)){ - return params.somatic[ "GRCh37" ][ params.truth_id ][ attribute ] - } - } - else if(params.analysis == "germline") { - if (params.germline[ "GRCh37" ][ params.truth_id ].containsKey(attribute)){ - return params.germline[ "GRCh37" ][ params.truth_id ][ attribute ] - } - } - } - else if (params.genome == "GRCh37"){ - if (params.analysis == "somatic"){ - if (params.somatic[ "GRCh38" ][ params.truth_id ].containsKey(attribute)){ - return params.somatic[ "GRCh38" ][ params.truth_id ][ attribute ] - } - } - else if(params.analysis == "germline") { - if (params.germline[ "GRCh38" ][ params.truth_id ].containsKey(attribute)){ - return params.germline[ "GRCh38" ][ params.truth_id ][ attribute ] - } - } - } - } - else{ - if (params.analysis == "somatic"){ - if (params.somatic[ params.genome ][ params.truth_id ].containsKey(attribute)){ - return params.somatic[ params.genome ][ params.truth_id ][ attribute ] - } - } - else if(params.analysis == "germline") { - if (params.germline[ params.genome ][ params.truth_id ].containsKey(attribute)){ - return params.germline[ params.genome][ params.truth_id ][ attribute ] - } - } - - } - } - else{ - return null - } - } -} - // // Exit pipeline if incorrect --genome key provided // From dedd5eb13881e08e70062922a4df3002f068695f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Wed, 6 Nov 2024 15:09:22 +0000 Subject: [PATCH 3/4] test profile for github actions --- conf/test.config | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 conf/test.config diff --git a/conf/test.config b/conf/test.config new file mode 100644 index 0000000..5c34a7a --- /dev/null +++ b/conf/test.config @@ -0,0 +1,43 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/variantbenchmarking -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = "assets/samplesheet_small.csv" + outdir = "results" + + // Genome references + genome = 'GRCh38' + + // Processes + analysis = 'germline' + variant_type = "small" + method = 'happy,rtgtools' + preprocess = "normalization,deduplication,prepy" + include_expression = 'FILTER="."' + + // truth information + truth_id = "HG002" + truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/CRMG_benchmark_dataset/HG002_GRCh38_CMRG_smallvar_v1.00.vcf.gz" + regions_bed = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/CRMG_benchmark_dataset/HG002_GRCh38_CMRG_smallvar_v1.00.bed" +} From bf091c6fcd02e4a25db191fbaadb16449fa6bfd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Wed, 6 Nov 2024 15:21:26 +0000 Subject: [PATCH 4/4] fix linting --- nextflow.config | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c887d6a..ce30578 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,9 +21,11 @@ params { // Truth parameters truth_id = null + truth_vcf = null + regions_bed = null // Benchmarking method - method = 'truvari,svanalyzer,happy,rtgtools,wittyer' + method = 'truvari,svanalyzer,happy,rtgtools,wittyer,sompy' // Preprocess spesific parameters preprocess = "" @@ -44,6 +46,8 @@ params { igenomes_ignore = false sdf = null dictionary = null + rename_chr = null + chain = null liftover = false // MultiQC options