Skip to content

Commit

Permalink
Merge pull request #76 from nf-core/44-add-aws-full-test
Browse files Browse the repository at this point in the history
44 add aws full test
  • Loading branch information
kubranarci authored Jul 31, 2024
2 parents 9e78f07 + 7f13f6d commit 6ebdb63
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 39 deletions.
12 changes: 4 additions & 8 deletions assets/samplesheet_HG002_hg37_full.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
id,test_vcf,caller,vartype,refdist,chunksize,normshift,normdist,normsizediff,maxdist,typeignore,dup_to_ins,pctsize,pctseq,pctovl,evaluationmode,subsample
delly1,"https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_delly_SV_hg19.chr21.vcf.gz",delly,sv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
lumpy1,"https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_lumpy_SV_hg19.sorted.vcf.gz",lumpy,sv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
manta1,"https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_manta_SV_hg19_genotype.chr21.vcf.gz",manta,sv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
sbg2,"https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002-NA24385-50x.union_170414.split.chr21.vcf.gz",sbg_graph,cnv,,,,,,,true,true,0.3,0,0,cts,
lumpy2,"https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002_lumpy_SV_hg19.sorted.vcf.gz",lumpy,cnv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
gatk2,"https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002.jointVC.filter.chr21.vcf.gz",gatk_joint,small,,,,,,,,,,,,,
sbg3,"https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/HG002-NA24385-50x.union_170414.split.chr21.vcf.gz",sbg_graph,small,,,,,,,,,,,,,
test,"https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/test/merged3.vcf",delly,sv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,delly
test1,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/manta_GIABv0.6/diploidSV_PASS_DUPtoINS.vcf.gz",manta,sv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
test2,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/GIAB_Evaluations/BoutrosLab-SV_curation_GIABv0.6/delly_GIABv0.6/norm_recall_HG002.merged__filt_DUPtoINS.vcf.gz",delly,sv,100000,100000,0.3,0.3,0.3,100000,true,true,0.3,0,0,cts,
test4,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/SevenBridges_GraphGATKRefine_05052017/HG002-NA24385-50x.union_170414.split.vcf.gz",graph,small,,,,,,,,,,,,,
test5,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/OsloUniversityHospital_Exome_GATK_jointVC_11242015/HG002-HG003-HG004.jointVC.filter.vcf",gatk,small,,,,,,,,,,,,,"Sample_Diag-excap51-HG002-EEogPU"
8 changes: 4 additions & 4 deletions assets/samplesheet_HG002_hg38_full.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id,test_vcf,caller,vartype,pctsize,pctseq,pctovl,refdist,chunksize,normshift,normdist,normsizediff,maxdist
lumpy1,/Users/w620-admin/Desktop/nf-core/dataset/hg38/GIAB_GRCh38_SVs_06252018/ajtrio.lumpy.svtyper.HG002.md.sorted.recal.vcf.gz,lumpy,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000
manta1,/Users/w620-admin/Desktop/nf-core/dataset/hg38/GIAB_GRCh38_SVs_06252018/manta.HG002.vcf.gz,manta,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000
unknown1,/Users/w620-admin/Desktop/nf-core/dataset/wittyer_test_cases/HG002.sv.with.corr.vcf.gz,unknown,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000

test11,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/BU_GRCh38_SVs_06252018/ajtrio.lumpy.svtyper.HG002.md.sorted.recal.vcf.gz",lumpy,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000
test22,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/BU_GRCh38_SVs_06252018/HG002.gatk.raw.snps.indels.vcf.gz",gatk,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000
test3,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/BU_GRCh38_SVs_06252018/manta.HG002.vcf.gz",manta,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000
test4,"https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/data/AshkenazimTrio/analysis/BU_GRCh38_SVs_06252018/HG002.gatk.raw.snps.indels.vcf.gz",gatk,small,,,,,,,,,
15 changes: 11 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,18 @@ process {
withName: BCFTOOLS_VIEW_SUBSAMPLE {
ext.prefix = { vcf.baseName - ".vcf" + ".subsample" }
ext.args = {"--output-type z -s ${meta.subsample}" }
publishDir = [
path: { "${params.outdir}/test" },
enabled: false
]
}
withName: BCFTOOLS_VIEW_FILTERMISSING {
ext.prefix = { vcf.baseName - ".vcf" + ".filtermiss" }
ext.args = {"--output-type z -e 'AC=0'" }
publishDir = [
path: { "${params.outdir}/test" },
enabled: false
]
}
withName: BCFTOOLS_VIEW_CONTIGS {
ext.prefix = { vcf.baseName - ".vcf" + ".nocontigs" }
Expand All @@ -75,9 +83,8 @@ process {
(params.genome.contains("38"))? "-r chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY" : "-r 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"
].join(' ').trim() }
publishDir = [
path: {"${params.outdir}/${meta.id}/preprocess"},
pattern: "*{.vcf}",
mode: params.publish_dir_mode
path: { "${params.outdir}/test" },
enabled: false
]
}
withName: BCFTOOLS_VIEW_SNV {
Expand Down Expand Up @@ -296,7 +303,7 @@ process {
// Don't publish results for these processes
//
process {
withName: 'TABIX_TABIX|TABIX_BGZIP|TABIX_BGZIPTABIX|BGZIP_TABIX|SURVIVOR_MERGE|BCFTOOLS_MERGE' {
withName: 'TABIX_TABIX|TABIX_BGZIP|TABIX_BGZIPTABIX|BGZIP_TABIX|SURVIVOR_MERGE|BCFTOOLS_MERGE|REFORMAT_HEADER|BCFTOOLS_NORM|BCFTOOLS_DEDUP|BCFTOOLS_REHEADER' {
publishDir = [
path: { "${params.outdir}/test" },
enabled: false
Expand Down
8 changes: 8 additions & 0 deletions conf/test_hg37.config
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,12 @@ params {
preprocess = "normalization,deduplication,filter_contigs"
min_sv_size = 30
sv_standardization = "homogenize,svync"

truth_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.chr21.vcf.gz"
high_conf_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.bed"
truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.vcf.gz"
high_conf_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.bed"
truth_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.vcf.gz"
high_conf_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.bed"

}
7 changes: 7 additions & 0 deletions conf/test_hg37_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,11 @@ params {
min_sv_size = 30
//include_expression = 'FILTER="PASS"'

truth_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.chr21.vcf.gz"
high_conf_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.bed"
truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.vcf.gz"
high_conf_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.bed"
truth_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.vcf.gz"
high_conf_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.bed"

}
14 changes: 7 additions & 7 deletions conf/test_hg38.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ params {
// Processes
analysis = 'germline' //somatic
sample = "HG002" // available samples: SEQC2, HG002
method = 'truvari,svanalyzer,wittyer' // --not working for now : vcfdist

preprocess = "normalization, deduplication"
method = 'truvari,svanalyzer,rtgtools,happy' //
preprocess = "normalization,deduplication,filter_contigs"
min_sv_size = 30
//variant_filtering = "include" // null, include, exclude
//expression = 'FILTER="PASS"'

//truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.vcf.gz"
//truth_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/CRMG_benchmark_dataset/HG002_GRCh38_CMRG_smallvar_v1.00.vcf.gz"
//high_conf_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/CRMG_benchmark_dataset/HG002_GRCh38_CMRG_smallvar_v1.00.bed"
//truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz"
//high_conf_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed"

//truth_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz"
//high_conf_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed"
}
28 changes: 12 additions & 16 deletions conf/truth.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,28 @@ params {
germline {
'GRCh38' {
'HG002'{
truth_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/CRMG_benchmark_dataset/HG002_GRCh38_CMRG_smallvar_v1.00.vcf.gz"
high_conf_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/CRMG_benchmark_dataset/HG002_GRCh38_CMRG_smallvar_v1.00.bed"
truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz"
high_conf_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed"
truth_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz"
high_conf_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed"
truth_small = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz"
high_conf_small = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed"
truth_sv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/CMRG_v1.00/GRCh38/StructuralVariant/HG002_GRCh38_CMRG_SV_v1.00.vcf.gz"
high_conf_sv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/CMRG_v1.00/GRCh38/StructuralVariant/HG002_GRCh38_CMRG_SV_v1.00.bed"
}
}
'GRCh37'{
'HG002'{
truth_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.chr21.vcf.gz"
high_conf_small = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_GRCh37_1_22_v4.2.1_highconf.bed"
truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.vcf.gz"
high_conf_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.bed"
truth_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.vcf.gz"
high_conf_cnv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg37/truth/HG002_SVs_Tier1_v0.6.chr21.bed"
truth_small = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh37/HG002_GRCh37_1_22_v4.2.1_benchmark.vcf.gz"
high_conf_small = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NISTv4.2.1/GRCh37/HG002_GRCh37_1_22_v4.2.1_benchmark.bed"
truth_sv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/HG002_SVs_Tier1_v0.6.vcf.gz"
high_conf_sv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/AshkenazimTrio/HG002_NA24385_son/NIST_SV_v0.6/HG002_SVs_Tier1_v0.6.bed"
}
}
}
somatic{
'GRCh38'{
'SEQC2'{
truth_snv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/test/hg38/sSNV_truth_set_v1.0.chr21.vcf.gz"
high_conf_snv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/test/hg38/high-confidence_sSNV_in_HC_regions_v1.2.chr21.vcf.gz"
truth_indel = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/test/hg38/sINDEL_truth_set_v1.0.chr21.vcf.gz"
high_conf_indel = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/somatic/test/hg38/high-confidence_sINDEL_in_HC_regions_v1.2.chr21.vcf.gz"
truth_snv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/high-confidence_sSNV_in_HC_regions_v1.2.1.vcf.gz"
high_conf_snv = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/High-Confidence_Regions_v1.2.bed"
truth_indel = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/high-confidence_sINDEL_in_HC_regions_v1.2.1.vcf.gz"
high_conf_indel = "https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/seqc/Somatic_Mutation_WG/release/latest/High-Confidence_Regions_v1.2.bed"
}
}
}
Expand Down

0 comments on commit 6ebdb63

Please sign in to comment.