Skip to content

Commit

Permalink
fix filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
kubranarci committed Jan 13, 2025
1 parent be29a99 commit ce8c7d7
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 88 deletions.
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ params {
config_profile_description = 'Minimal test dataset to check pipeline function'

// Input data
input = "assets/samplesheet_small.csv"
input = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/samplesheet_small.csv"
outdir = "results"

// Genome references
Expand Down
2 changes: 1 addition & 1 deletion conf/tests/germline_small.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ params {
variant_type = "small"
method = 'happy,rtgtools'
preprocess = "normalization,deduplication,prepy"
include_expression = 'FILTER="."'
include_expression = '(ILEN >= -5 && ILEN <= 5)'

// truth information
truth_id = "HG002"
Expand Down
2 changes: 1 addition & 1 deletion conf/tests/somatic_snv.config
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ params {
truth_id = "SEQC2"
truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/truth/hg38/sSNV_truth_set_v1.0.chr21.vcf.gz"
regions_bed = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/truth/hg38/high-confidence_sSNV_in_HC_regions_v1.2.chr21.vcf.gz"
rename_chr = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch37_grch38.txt"
rename_chr = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch37_grch38.txt"

}
2 changes: 1 addition & 1 deletion conf/tests/somatic_sv.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ params {

truth_id = "SEQC2"
truth_vcf = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/truth/hg38/13059_2022_2816_MOESM4_ESM.vcf.gz"
rename_chr = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch37_grch38.txt"
rename_chr = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/grch37_grch38.txt"

}
17 changes: 9 additions & 8 deletions subworkflows/local/vcf_variant_filtering.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,7 @@ workflow VCF_VARIANT_FILTERING {

versions = Channel.empty()

// unzip vcf file, required for survivor filter
TABIX_BGZIP(
vcf_ch.map{ meta, vcf, index -> tuple(meta, vcf)}
)
versions = versions.mix(TABIX_BGZIP.out.versions.first())
vcf_ch = TABIX_BGZIP.out.output

if(params.exclude_expression != null & params.include_expression != null){
if(params.exclude_expression != null | params.include_expression != null){

// filter vcf files using bcftools expressions
BCFTOOLS_FILTER(
Expand All @@ -32,6 +25,14 @@ workflow VCF_VARIANT_FILTERING {
versions = versions.mix(BCFTOOLS_FILTER.out.versions.first())
vcf_ch = BCFTOOLS_FILTER.out.vcf
}
else{
// unzip vcf file, required for survivor filter
TABIX_BGZIP(
vcf_ch.map{ meta, vcf, index -> tuple(meta, vcf)}
)
versions = versions.mix(TABIX_BGZIP.out.versions.first())
vcf_ch = TABIX_BGZIP.out.output
}

if(params.min_sv_size > 0 | params.max_sv_size != -1 | params.min_allele_freq != -1 | params.min_num_reads != -1){

Expand Down
66 changes: 35 additions & 31 deletions tests/germline_small.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
"BCFTOOLS_DEDUP": {
"bcftools": 1.18
},
"BCFTOOLS_FILTER": {
"bcftools": 1.2
},
"BCFTOOLS_MERGE": {
"bcftools": 1.2
},
Expand Down Expand Up @@ -45,9 +48,6 @@
"RTGTOOLS_VCFEVAL": {
"rtg-tools": "3.12.1"
},
"TABIX_BGZIP": {
"tabix": "1.19.1"
},
"TABIX_TABIX": {
"tabix": "1.19.1"
},
Expand Down Expand Up @@ -162,6 +162,7 @@
"small/test1/benchmarks/rtgtools/test1.HG002.strelka.tp.vcf.gz.tbi",
"small/test1/benchmarks/rtgtools/test1.HG002.strelka.weighted_roc.tsv.gz",
"small/test1/preprocess",
"small/test1/preprocess/HG002.strelka.variants.chr21.rh.norm.filter.vcf",
"small/test1/preprocess/test1.HG002.strelka.prepy.vcf.gz",
"small/test1/preprocess/test1.dedup.sort.vcf.gz",
"small/test1/stats",
Expand Down Expand Up @@ -196,6 +197,7 @@
"small/test2/benchmarks/rtgtools/test2.HG002.bcftools.tp.vcf.gz.tbi",
"small/test2/benchmarks/rtgtools/test2.HG002.bcftools.weighted_roc.tsv.gz",
"small/test2/preprocess",
"small/test2/preprocess/HG002.bcftools.chr21.rh.norm.filter.vcf",
"small/test2/preprocess/test2.HG002.bcftools.prepy.vcf.gz",
"small/test2/preprocess/test2.dedup.sort.vcf.gz",
"small/test2/stats",
Expand Down Expand Up @@ -228,10 +230,10 @@
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2024-12-10T10:55:37.880792825"
"timestamp": "2025-01-13T13:40:17.22451061"
},
"Params: --analysis 'germline' --variant_type 'small' --method 'happy,rtgtools'": {
"content": [
Expand All @@ -240,6 +242,9 @@
"BCFTOOLS_DEDUP": {
"bcftools": 1.18
},
"BCFTOOLS_FILTER": {
"bcftools": 1.2
},
"BCFTOOLS_MERGE": {
"bcftools": 1.2
},
Expand Down Expand Up @@ -279,9 +284,6 @@
"RTGTOOLS_VCFEVAL": {
"rtg-tools": "3.12.1"
},
"TABIX_BGZIP": {
"tabix": "1.19.1"
},
"TABIX_TABIX": {
"tabix": "1.19.1"
},
Expand Down Expand Up @@ -491,6 +493,7 @@
"small/test1/benchmarks/rtgtools/test1.HG002.strelka.tp.vcf.gz.tbi",
"small/test1/benchmarks/rtgtools/test1.HG002.strelka.weighted_roc.tsv.gz",
"small/test1/preprocess",
"small/test1/preprocess/HG002.strelka.variants.chr21.rh.norm.filter.vcf",
"small/test1/preprocess/test1.HG002.strelka.prepy.vcf.gz",
"small/test1/preprocess/test1.dedup.sort.vcf.gz",
"small/test1/stats",
Expand Down Expand Up @@ -525,6 +528,7 @@
"small/test2/benchmarks/rtgtools/test2.HG002.bcftools.tp.vcf.gz.tbi",
"small/test2/benchmarks/rtgtools/test2.HG002.bcftools.weighted_roc.tsv.gz",
"small/test2/preprocess",
"small/test2/preprocess/HG002.bcftools.chr21.rh.norm.filter.vcf",
"small/test2/preprocess/test2.HG002.bcftools.prepy.vcf.gz",
"small/test2/preprocess/test2.dedup.sort.vcf.gz",
"small/test2/stats",
Expand All @@ -549,32 +553,32 @@
"suffixdata0:md5,f2876dd730673cd49c4de191001f634e",
"suffixpointer0:md5,468281ffb10d7dd934289af762a03781",
"HG002.bcftools_stats.txt:md5,b215fc0030c53bc8887e28b23b97efb6",
"test1.HG002.strelka.extended.csv:md5,098dc352d1beeab27e72f97e64fc182e",
"test1.HG002.strelka.roc.Locations.INDEL.PASS.csv.gz:md5,bd1f1a1e138f75511de7aae157fa2fa4",
"test1.HG002.strelka.roc.Locations.INDEL.csv.gz:md5,8912022beb3c5b234d29300b764b56bc",
"test1.HG002.strelka.roc.Locations.SNP.PASS.csv.gz:md5,63279f14076a5a9919a4d30b0fbb7746",
"test1.HG002.strelka.roc.Locations.SNP.csv.gz:md5,77aedefc4516566b136acfba82bed147",
"test1.HG002.strelka.roc.all.csv.gz:md5,ccefcf1ff7ef04c9d97b8afbbcfb3c51",
"test1.HG002.strelka.summary.csv:md5,c14117d825b861657b38c058644896f9",
"test1.HG002.strelka.phasing.txt:md5,5ab9ffbd7c18a5512851086b6e7ecf59",
"test1.HG002.strelka.summary.txt:md5,e045d6a048bdec24cb55f9dd1b55ffe7",
"test1.strelka.bcftools_stats.txt:md5,f9f8cf5c1be9c88754888ef8cbfb06f4",
"test2.HG002.bcftools.extended.csv:md5,14392c3b5a1c060d63414cfc10e670b6",
"test2.HG002.bcftools.roc.Locations.INDEL.PASS.csv.gz:md5,507b54e5b6d1f956f557b39a7dc38ff4",
"test2.HG002.bcftools.roc.Locations.INDEL.csv.gz:md5,4694d6169ce0819d36768045a7ef43d0",
"test2.HG002.bcftools.roc.Locations.SNP.PASS.csv.gz:md5,83d1fa126072c16b0471fb8877c5c818",
"test2.HG002.bcftools.roc.Locations.SNP.csv.gz:md5,10045ce81f85c51e142a7ff30d7579bb",
"test2.HG002.bcftools.roc.all.csv.gz:md5,b176e3c9910ecf3209353682553c1a06",
"test2.HG002.bcftools.summary.csv:md5,3815ea18e8fc11f198a5821dee79a3c7",
"test1.HG002.strelka.extended.csv:md5,4362260b357ac0221414095f4c5a8981",
"test1.HG002.strelka.roc.Locations.INDEL.PASS.csv.gz:md5,a5ba4044a89ae80fb0ddf95147b5ae4c",
"test1.HG002.strelka.roc.Locations.INDEL.csv.gz:md5,52edef7d20ac8a7e03771037f5c93fe4",
"test1.HG002.strelka.roc.Locations.SNP.PASS.csv.gz:md5,f71e697d7ebaf1d670e5dc2c0e0106d3",
"test1.HG002.strelka.roc.Locations.SNP.csv.gz:md5,c388d5a15ba2ae8dff709b030f1b4828",
"test1.HG002.strelka.roc.all.csv.gz:md5,59ebbe78bf428b5c2c78c8ff92f54545",
"test1.HG002.strelka.summary.csv:md5,60af846379cf4fe078fbed6b9d1e8178",
"test1.HG002.strelka.phasing.txt:md5,838e67ae5b9cd9e218095596c03fbee3",
"test1.HG002.strelka.summary.txt:md5,e79779d3faebe02bae943ddc17c4cf91",
"test1.strelka.bcftools_stats.txt:md5,492f42090004470e7e0ea7abc5f89bdf",
"test2.HG002.bcftools.extended.csv:md5,2f8ef20f46c821333ba970e3034a6ccd",
"test2.HG002.bcftools.roc.Locations.INDEL.PASS.csv.gz:md5,cdf3fdb7c5b4c54d9896e37a99dbf4f9",
"test2.HG002.bcftools.roc.Locations.INDEL.csv.gz:md5,9b16abcfe483356020c550e1292554ed",
"test2.HG002.bcftools.roc.Locations.SNP.PASS.csv.gz:md5,f71e697d7ebaf1d670e5dc2c0e0106d3",
"test2.HG002.bcftools.roc.Locations.SNP.csv.gz:md5,c388d5a15ba2ae8dff709b030f1b4828",
"test2.HG002.bcftools.roc.all.csv.gz:md5,e772fefec84f9a6e60c6979bac14cedc",
"test2.HG002.bcftools.summary.csv:md5,05722d23f523141fdce842a18f1d8aa2",
"test2.HG002.bcftools.phasing.txt:md5,38920536b8c3e241e873c07ba61762e6",
"test2.HG002.bcftools.summary.txt:md5,b9a0c74a8b5af4f2c1c0623b61d7cbe3",
"test2.bcftools.bcftools_stats.txt:md5,57aff2f0a6f830e920869b987502a343"
"test2.HG002.bcftools.summary.txt:md5,a7bbcdf86cd3f1f7815ea9bc25b57b61",
"test2.bcftools.bcftools_stats.txt:md5,440fd66ee557b215bbacd05953215f7a"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2024-12-10T10:53:32.971378431"
"timestamp": "2025-01-13T14:00:31.553072869"
}
}
44 changes: 28 additions & 16 deletions tests/germline_sv.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
"BCFTOOLS_DEDUP": {
"bcftools": 1.18
},
"BCFTOOLS_FILTER": {
"bcftools": 1.2
},
"BCFTOOLS_NORM": {
"bcftools": 1.18
},
Expand Down Expand Up @@ -264,6 +267,7 @@
"structural/test1/preprocess/manta.HG002.chr21.norm.vcf.gz",
"structural/test1/preprocess/test1.dedup.sort.vcf.gz",
"structural/test1/preprocess/test1.manta.svync.vcf.gz",
"structural/test1/preprocess/test1.norm.filter.filter.vcf",
"structural/test1/preprocess/test1.norm.filter.vcf",
"structural/test1/stats",
"structural/test1/stats/bcftools",
Expand Down Expand Up @@ -296,6 +300,7 @@
"structural/test2/preprocess/Ashkenazim_HG002.filtered.sv.chr21.norm.sort.vcf.gz",
"structural/test2/preprocess/Ashkenazim_HG002.filtered.sv.chr21.norm.vcf.gz",
"structural/test2/preprocess/test2.dedup.sort.vcf.gz",
"structural/test2/preprocess/test2.norm.filter.filter.vcf",
"structural/test2/preprocess/test2.norm.filter.vcf",
"structural/test2/stats",
"structural/test2/stats/bcftools",
Expand Down Expand Up @@ -329,6 +334,7 @@
"structural/test3/preprocess/HG002_DRAGEN_SV_hg19.chr21.norm.vcf.gz",
"structural/test3/preprocess/test3.dedup.sort.vcf.gz",
"structural/test3/preprocess/test3.dragen.svync.vcf.gz",
"structural/test3/preprocess/test3.norm.filter.filter.vcf",
"structural/test3/preprocess/test3.norm.filter.vcf",
"structural/test3/stats",
"structural/test3/stats/bcftools",
Expand All @@ -339,25 +345,25 @@
[
"HG002.bcftools_stats.txt:md5,8294f172a72ca7219a32db9c27e2524c",
"HG002_mqc.stats:md5,68681df47b35e3193be03610f5c6e3d6",
"test1.HG002.manta.distances:md5,33dcbe29f7129dc30c184b564803ec63",
"test1.HG002.manta.report:md5,91b0fbfd6fcdf45b85290f92c5a470fb",
"test1.manta.bcftools_stats.txt:md5,0e4cf1707b245c026fe6a0af6e8f8c0a",
"test1.manta_mqc.stats:md5,6f25a9372095052218110390e8e75c54",
"test1.HG002.manta.distances:md5,346f18a5cbeece98716951c8fc2aaea4",
"test1.HG002.manta.report:md5,4a53712a9d15fa6dfe6ddd5848ca691c",
"test1.manta.bcftools_stats.txt:md5,7d65792aa3a84de09675facf62135c93",
"test1.manta_mqc.stats:md5,011ad66fec4287d32cb728c40e240c0b",
"test2.HG002.merged.distances:md5,346f18a5cbeece98716951c8fc2aaea4",
"test2.HG002.merged.report:md5,6466d73155f88084efc69cad3628eb8c",
"test2.merged.bcftools_stats.txt:md5,576f9bc7e06bc2bbfccceb9da161a467",
"test2.merged_mqc.stats:md5,36d80468e13b583d0761b434d095312d",
"test2.HG002.merged.report:md5,4a53712a9d15fa6dfe6ddd5848ca691c",
"test2.merged.bcftools_stats.txt:md5,1445742129b0ee67d8706af3dcf0ab2d",
"test2.merged_mqc.stats:md5,011ad66fec4287d32cb728c40e240c0b",
"test3.HG002.dragen.distances:md5,346f18a5cbeece98716951c8fc2aaea4",
"test3.HG002.dragen.report:md5,b295d4867b7a96f0ca0bb5e8bb45eb68",
"test3.dragen.bcftools_stats.txt:md5,0683a5d3c47a5a6a43ad2b6a0387672e",
"test3.dragen_mqc.stats:md5,5c59cceb2e7d909d91074bb8e5804abc"
"test3.HG002.dragen.report:md5,4a53712a9d15fa6dfe6ddd5848ca691c",
"test3.dragen.bcftools_stats.txt:md5,5d2b48ac5f194f5a2cf01b9623a28cce",
"test3.dragen_mqc.stats:md5,011ad66fec4287d32cb728c40e240c0b"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2024-12-10T10:42:14.227940401"
"timestamp": "2025-01-13T13:53:07.620717905"
},
"-stub": {
"content": [
Expand All @@ -366,6 +372,9 @@
"BCFTOOLS_DEDUP": {
"bcftools": 1.18
},
"BCFTOOLS_FILTER": {
"bcftools": 1.2
},
"BCFTOOLS_NORM": {
"bcftools": 1.18
},
Expand Down Expand Up @@ -563,6 +572,7 @@
"structural/test1/preprocess/manta.HG002.chr21.norm.vcf.gz",
"structural/test1/preprocess/test1.dedup.sort.vcf.gz",
"structural/test1/preprocess/test1.manta.svync.vcf.gz",
"structural/test1/preprocess/test1.norm.filter.filter.vcf",
"structural/test1/preprocess/test1.norm.filter.vcf",
"structural/test1/stats",
"structural/test1/stats/bcftools",
Expand Down Expand Up @@ -595,6 +605,7 @@
"structural/test2/preprocess/Ashkenazim_HG002.filtered.sv.chr21.norm.sort.vcf.gz",
"structural/test2/preprocess/Ashkenazim_HG002.filtered.sv.chr21.norm.vcf.gz",
"structural/test2/preprocess/test2.dedup.sort.vcf.gz",
"structural/test2/preprocess/test2.norm.filter.filter.vcf",
"structural/test2/preprocess/test2.norm.filter.vcf",
"structural/test2/stats",
"structural/test2/stats/bcftools",
Expand Down Expand Up @@ -628,6 +639,7 @@
"structural/test3/preprocess/HG002_DRAGEN_SV_hg19.chr21.norm.vcf.gz",
"structural/test3/preprocess/test3.dedup.sort.vcf.gz",
"structural/test3/preprocess/test3.dragen.svync.vcf.gz",
"structural/test3/preprocess/test3.norm.filter.filter.vcf",
"structural/test3/preprocess/test3.norm.filter.vcf",
"structural/test3/stats",
"structural/test3/stats/bcftools",
Expand All @@ -653,9 +665,9 @@
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2024-12-10T10:44:36.4725572"
"timestamp": "2025-01-13T13:55:36.985456265"
}
}
16 changes: 10 additions & 6 deletions tests/somatic_snv.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
"content": [
38,
{
"BCFTOOLS_FILTER": {
"bcftools": 1.2
},
"BCFTOOLS_REHEADER": {
"bcftools": 1.18
},
Expand Down Expand Up @@ -33,9 +36,6 @@
"PLOTS": {
"r-base": "4.3.1"
},
"TABIX_BGZIP": {
"tabix": "1.19.1"
},
"TABIX_BGZIPTABIX": {
"tabix": "1.19.1"
},
Expand Down Expand Up @@ -90,6 +90,8 @@
"snv/test1/benchmarks/sompy/test1.SEQC2.freebayes.features.csv",
"snv/test1/benchmarks/sompy/test1.SEQC2.freebayes.metrics.json",
"snv/test1/benchmarks/sompy/test1.SEQC2.freebayes.stats.csv",
"snv/test1/preprocess",
"snv/test1/preprocess/test1.filter.vcf",
"snv/test1/stats",
"snv/test1/stats/bcftools",
"snv/test1/stats/bcftools/test1.freebayes.bcftools_stats.txt",
Expand All @@ -101,6 +103,7 @@
"snv/test2/benchmarks/sompy/test2.SEQC2.manta.stats.csv",
"snv/test2/preprocess",
"snv/test2/preprocess/HCC1395T_vs_HCC1395N.manta.somatic_sv.sort.vcf.gz",
"snv/test2/preprocess/test2.filter.vcf",
"snv/test2/stats",
"snv/test2/stats/bcftools",
"snv/test2/stats/bcftools/test2.manta.bcftools_stats.txt",
Expand All @@ -112,6 +115,7 @@
"snv/test3/benchmarks/sompy/test3.SEQC2.strelka.stats.csv",
"snv/test3/preprocess",
"snv/test3/preprocess/HCC1395T_vs_HCC1395N.strelka.somatic_snvs.sort.vcf.gz",
"snv/test3/preprocess/test3.filter.vcf",
"snv/test3/stats",
"snv/test3/stats/bcftools",
"snv/test3/stats/bcftools/test3.strelka.bcftools_stats.txt"
Expand All @@ -124,9 +128,9 @@
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2024-12-10T13:54:36.607079161"
"timestamp": "2025-01-13T14:15:33.922876808"
}
}
Loading

0 comments on commit ce8c7d7

Please sign in to comment.