Skip to content

Commit

Permalink
Merge pull request #103 from nf-core/100-refactoring-input-truth-files
Browse files Browse the repository at this point in the history
100 refactoring input truth files
  • Loading branch information
kubranarci authored Nov 6, 2024
2 parents 14812e9 + bf091c6 commit 0ca66e3
Show file tree
Hide file tree
Showing 37 changed files with 374 additions and 763 deletions.
18 changes: 16 additions & 2 deletions assets/datavzrd/sompy.datavzrd.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ views:
columns:
Tool:
display-mode: normal
Threshold:
Type:
display-mode: normal
TP_base:
display-mode: normal
TP:
display-mode: normal
FN:
display-mode: normal
TP_call:
Expand All @@ -23,5 +25,17 @@ views:
display-mode: normal
Recall:
display-mode: normal
F1:
recall_lower:
display-mode: normal
recall_upper:
display-mode: normal
recall2:
display-mode: normal
precision_lower:
display-mode: normal
precision_upper:
display-mode: normal
fp.region.size:
display-mode: normal
fp.rate:
display-mode: normal
6 changes: 0 additions & 6 deletions assets/samplesheet.csv

This file was deleted.

5 changes: 0 additions & 5 deletions assets/samplesheet_full.csv

This file was deleted.

3 changes: 3 additions & 0 deletions assets/samplesheet_full_small.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,test_vcf,caller,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,dup_to_ins,pctsize,pctseq,pctovl,evaluationmode,subsample
test1,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/SevenBridges_GraphGATKRefine_05052017/HG002-NA24385-50x.union_170414.split.vcf.gz",graph,,,,,,,,,,,,,
test2,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/OsloUniversityHospital_Exome_GATK_jointVC_11242015/HG002-HG003-HG004.jointVC.filter.vcf",gatk,,,,,,,,,,,,,"Sample_Diag-excap51-HG002-EEogPU"
3 changes: 3 additions & 0 deletions assets/samplesheet_full_sv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,test_vcf,caller,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,dup_to_ins,pctsize,pctseq,pctovl,evaluationmode,subsample
test1,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/manta_GIABv0.6/diploidSV_PASS_DUPtoINS.vcf.gz",manta,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
test2,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/delly_GIABv0.6/norm_recall_HG002.merged__filt_DUPtoINS.vcf.gz",delly,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
3 changes: 3 additions & 0 deletions assets/samplesheet_small.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,test_vcf,caller
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek/hg38/HG002.strelka.variants.chr21.vcf.gz,strelka
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek/hg38/HG002.bcftools.chr21.vcf.gz,bcftools
7 changes: 0 additions & 7 deletions assets/samplesheet_somatic.csv

This file was deleted.

3 changes: 3 additions & 0 deletions assets/samplesheet_somatic_indel.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,test_vcf,caller,subsample
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.freebayes.chr21.vcf.gz,freebayes,
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.strelka.somatic_indels.vcf.gz,strelka,"TUMOR"
4 changes: 4 additions & 0 deletions assets/samplesheet_somatic_snv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
id,test_vcf,caller,subsample
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.freebayes.chr21.vcf.gz,freebayes,
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.manta.somatic_sv.vcf.gz,manta,"HCC1395_HCC1395T"
test3,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.strelka.somatic_snvs.vcf.gz,strelka,"TUMOR"
3 changes: 3 additions & 0 deletions assets/samplesheet_somatic_sv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,test_vcf,caller,subsample
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.tiddit_sv_merge.vcf.gz,tiddit,"HCC1395_HCC1395T"
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/sarek_full_test_somatic_v3.4.2/HCC1395T_vs_HCC1395N.manta.somatic_sv.vcf.gz,manta,"HCC1395_HCC1395T"
4 changes: 4 additions & 0 deletions assets/samplesheet_sv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
id,test_vcf,caller,pctsize,pctseq,pctovl,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,evaluationmode
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/manta.HG002.chr21.vcf.gz,manta,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/Ashkenazim_HG002.filtered.sv.chr21.vcf.gz,merged,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts
test3,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/test/HG002_DRAGEN_SV_hg19.chr21.vcf.gz,dragen,0.3,0,0,100000,100000,0.3,0.3,0.3,100000,true,cts
3 changes: 3 additions & 0 deletions assets/samplesheet_sv_hg37.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,test_vcf,caller
test1,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_delly_SV_hg19.chr21.vcf.gz,delly
test2,https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_manta_SV_hg19_genotype.chr21.vcf.gz,manta
11 changes: 1 addition & 10 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,6 @@
"minLength": 1,
"errorMessage": "Variant caller has to be defined. Can also be unknown, undefined or merged"
},
"vartype": {
"type": "string",
"pattern": "^\\S+$",
"description": "Variant type to apply benchmarking",
"meta": ["vartype"],
"minLength": 1,
"errorMessage": "Variant type can be only one of these: small, sv, snv, indel and cnv",
"enum": ["small", "sv", "snv", "indel", "cnv"]
},
"subsample": {
"type": "string",
"pattern": "^\\S+$",
Expand Down Expand Up @@ -165,6 +156,6 @@
"default": null
}
},
"required": ["test_vcf", "caller", "vartype", "id"]
"required": ["test_vcf", "caller", "id"]
}
}
1 change: 1 addition & 0 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ params {
mito_name = "chrM"
macs_gsize = "2.7e9"
blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"

}
'CHM13' {
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"
Expand Down
Loading

0 comments on commit 0ca66e3

Please sign in to comment.