diff --git a/Snakefile b/Snakefile index fe6da1e..81b6599 100644 --- a/Snakefile +++ b/Snakefile @@ -127,7 +127,6 @@ rule all: f"work/annos/grch38/seqvars/gnomad_mtdna/{DV.gnomad_mtdna}/gnomad_mtdna.vcf.gz", f"work/download/annos/grch38/seqvars/gnomad_exomes/{DV.gnomad_v4}/.done", f"work/download/annos/grch38/seqvars/gnomad_genomes/{DV.gnomad_v4}/.done", - # NB: gnomAD-SV GRCh38 was announced end of 2020 but not released yet # -- genome browser "features" (position-specific) # ---- GRCh37 f"work/annos/grch37/features/cons/{DV.ucsc_cons_37}/ucsc_conservation.tsv", @@ -181,10 +180,6 @@ rule all: # ----- genes f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.hpo}+{DV.today}+{PV.annonars}/rocksdb/IDENTITY", # -- worker data - f"output/full/worker/genes-regions-grch37-{DV.refseq_37}+{PV.worker}/refseq_genes.bin", - f"output/full/worker/genes-regions-grch37-{DV.ensembl_37}+{PV.worker}/ensembl_genes.bin", - f"output/full/worker/genes-regions-grch38-{DV.refseq_38}+{PV.worker}/refseq_genes.bin", - f"output/full/worker/genes-regions-grch38-{DV.ensembl_38}+{PV.worker}/ensembl_genes.bin", f"output/full/worker/genes-xlink-{DV.today}+{PV.worker}/genes-xlink.bin", f"output/full/worker/acmg-sf-{DV.acmg_sf}+{PV.worker}/acmg_sf.tsv", f"output/full/worker/mim2gene-{DV.today}+{PV.worker}/mim2gene.tsv", @@ -200,6 +195,8 @@ rule all: f"output/full/worker/bgdb-dgv-gs-grch38-{DV.dgv}+{PV.worker}/bgdb-dgv-gs.bin", f"output/full/worker/bgdb-gnomad-grch37-{DV.gnomad_sv}+{PV.worker}/bgdb-gnomad.bin", f"output/full/worker/bgdb-exac-grch37-{DV.exac_cnv}+{PV.worker}/bgdb-exac.bin", + f"output/full/worker/bgdb-gnomad-exomes-cnv-grch38-{DV.gnomad_sv4}+{PV.worker}/bgdb-gnomad-exomes-cnv-grch38.bin", + f"output/full/worker/bgdb-gnomad-genomes-sv-grch38-{DV.gnomad_sv4}+{PV.worker}/bgdb-gnomad-genomes-sv-grch38.bin", f"output/full/worker/bgdb-g1k-grch37-{DV.g1k_svs}+{PV.worker}/bgdb-g1k.bin", f"output/full/worker/clinvar-strucvars-grch37-{DV.clinvar_version}+{PV.worker}/clinvar-strucvars.bin", f"output/full/worker/clinvar-strucvars-grch38-{DV.clinvar_version}+{PV.worker}/clinvar-strucvars.bin", @@ -227,6 +224,8 @@ rule all: f"output/full/tracks/track-strucvars-exac-grch37-{DV.exac_cnv}+{DV.tracks}/exac.bed.gz", f"output/full/tracks/track-strucvars-g1k-grch37-{DV.g1k_svs}+{DV.tracks}/g1k.bed.gz", f"output/full/tracks/track-strucvars-gnomad-grch37-{DV.gnomad_sv}+{DV.tracks}/gnomad.bed.gz", + f"output/full/tracks/track-strucvars-gnomad-sv-grch38-{DV.gnomad_sv4}+{DV.tracks}/gnomad-sv.bed.gz", + f"output/full/tracks/track-strucvars-gnomad-cnv-grch38-{DV.gnomad_cnv4}+{DV.tracks}/gnomad-cnv.bed.gz", # ----- known pathogenic MMS f"output/full/tracks/track-strucvars-patho-mms-grch37-{DV.patho_mms}+{DV.tracks}/patho-mms.bed", f"output/full/tracks/track-strucvars-patho-mms-grch38-{DV.patho_mms}+{DV.tracks}/patho-mms.bed", @@ -408,7 +407,6 @@ include: "rules/output/annonars/regions.smk" # ---- worker include: "rules/output/worker/patho_mms.smk" include: "rules/output/worker/clinvar.smk" -include: "rules/output/worker/genes_regions.smk" include: "rules/output/worker/hgnc.smk" include: "rules/output/worker/acmg.smk" include: "rules/output/worker/mim2gene.smk" diff --git a/download_urls.yml b/download_urls.yml index 4b3ecea..0f86ab9 100644 --- a/download_urls.yml +++ b/download_urls.yml @@ -105,29 +105,101 @@ strategy: gz-head count: 160 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr1.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr2.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr3.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr4.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr5.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr6.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr7.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr8.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr9.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr10.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr11.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr12.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr13.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr14.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr15.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr16.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr17.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr18.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr19.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr20.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr21.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr22.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chrX.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chrY.vcf.gz + excerpt_strategy: + strategy: vcf-head + count: 1800 - url: https://www.deciphergenomics.org/files/downloads/HI_Predictions_Version3.bed.gz @@ -157,13 +229,9 @@ url: https://search.clinicalgenome.org/kb/reports/curation-activity-summary-report skip_upstream_check: true # does not work reliably in tests -- url: https://github.com/bihealth/annonars-data-clinvar/releases/download/clinvar-weekly-20230625/clinvar-strucvar-grch37-2023-0625+0.6.3.tar.gz +- url: https://github.com/varfish-org/clinvar-data-jsonl/releases/download/clinvar-weekly-20240612/clinvar-data-extract-vars-20240612+0.17.0.tar.gz excerpt_strategy: - strategy: no-excerpt - count: null -- url: https://github.com/bihealth/annonars-data-clinvar/releases/download/clinvar-weekly-20230625/clinvar-strucvar-grch38-2023-0625+0.6.3.tar.gz - excerpt_strategy: - strategy: no-excerpt + strategy: manual count: null - url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.4.4/mehari-data-txs-grch37-0.4.4.bin.zst diff --git a/environment.yml b/environment.yml index 5e552bb..fc74e6d 100644 --- a/environment.yml +++ b/environment.yml @@ -46,7 +46,7 @@ dependencies: - annonars =0.34.0 - viguno =0.2.0 - mehari =0.21.1 - - varfish-server-worker =0.10.2 + - varfish-server-worker =0.12.0 # S3 uploads - s5cmd =2.1.0 # async HTTP requests diff --git a/excerpt-data/05e93e6f1f5d60e6/gnomad.v4.0.sv.chr4.vcf.gz b/excerpt-data/05e93e6f1f5d60e6/gnomad.v4.0.sv.chr4.vcf.gz index fb9a00e..9bfb658 100644 --- a/excerpt-data/05e93e6f1f5d60e6/gnomad.v4.0.sv.chr4.vcf.gz +++ b/excerpt-data/05e93e6f1f5d60e6/gnomad.v4.0.sv.chr4.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74cdc80bc74dd499a52d06dea268987036ea52bdab9d9add9c5e3816a4629f43 -size 420152 +oid sha256:d9be1bb5c174b52283115f136ba6cf572e7a6263949b42027812e1cb1885ccb6 +size 6383282 diff --git a/excerpt-data/05e93e6f1f5d60e6/gnomad.v4.0.sv.chr4.vcf.gz.tbi b/excerpt-data/05e93e6f1f5d60e6/gnomad.v4.0.sv.chr4.vcf.gz.tbi index ad46be9..3ac5653 100644 --- a/excerpt-data/05e93e6f1f5d60e6/gnomad.v4.0.sv.chr4.vcf.gz.tbi +++ b/excerpt-data/05e93e6f1f5d60e6/gnomad.v4.0.sv.chr4.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:952a07ed29b3360170a993dc12a1aa6773820233779762704655446b32baa416 -size 414 +oid sha256:5e9d39a73ebced836310fe542958b69ce6fc3213ab10f81b02f918256f0b07f6 +size 2230 diff --git a/excerpt-data/0be9b2561c9397f2/gnomad.v4.0.sv.chr15.vcf.gz b/excerpt-data/0be9b2561c9397f2/gnomad.v4.0.sv.chr15.vcf.gz index 6653e09..559d895 100644 --- a/excerpt-data/0be9b2561c9397f2/gnomad.v4.0.sv.chr15.vcf.gz +++ b/excerpt-data/0be9b2561c9397f2/gnomad.v4.0.sv.chr15.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d891ea2a13db923a679f0f38578a4ed30518bdb9ea41b21585e9250d8d5c19e7 -size 489677 +oid sha256:af85db78f15ed4f5634a79962d798a287738f26d8d82a669a3aea39a0c839420 +size 6996598 diff --git a/excerpt-data/0be9b2561c9397f2/gnomad.v4.0.sv.chr15.vcf.gz.tbi b/excerpt-data/0be9b2561c9397f2/gnomad.v4.0.sv.chr15.vcf.gz.tbi index ae0fcdb..00507e9 100644 --- a/excerpt-data/0be9b2561c9397f2/gnomad.v4.0.sv.chr15.vcf.gz.tbi +++ b/excerpt-data/0be9b2561c9397f2/gnomad.v4.0.sv.chr15.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ccc9925e01347b6290c9a7d57bb3d1ece3a68eac2c44cf840dcddf84c0b2307 -size 296 +oid sha256:25f5813ebdb04052370608b8a6baf2af3cd50d70249b6c78d0062217a23e2345 +size 1414 diff --git a/excerpt-data/13d4b1406e769b80/gnomad.v4.0.sv.chr3.vcf.gz b/excerpt-data/13d4b1406e769b80/gnomad.v4.0.sv.chr3.vcf.gz index aac91a5..ac3e23b 100644 --- a/excerpt-data/13d4b1406e769b80/gnomad.v4.0.sv.chr3.vcf.gz +++ b/excerpt-data/13d4b1406e769b80/gnomad.v4.0.sv.chr3.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cae509c9c8a04c8be9606de238b74e158873bc1e0a22480701a0dafa40df2849 -size 454927 +oid sha256:9cee20411339f3b261589ccbda706a2c5d66d0e7c7dc1e6689374edd2ff06777 +size 6409154 diff --git a/excerpt-data/13d4b1406e769b80/gnomad.v4.0.sv.chr3.vcf.gz.tbi b/excerpt-data/13d4b1406e769b80/gnomad.v4.0.sv.chr3.vcf.gz.tbi index cad8f87..d30c9bb 100644 --- a/excerpt-data/13d4b1406e769b80/gnomad.v4.0.sv.chr3.vcf.gz.tbi +++ b/excerpt-data/13d4b1406e769b80/gnomad.v4.0.sv.chr3.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb6fa1c42693ed4db554b389c579c70f1f29694d52db290d2718c5246068e5ef -size 170 +oid sha256:297b5eb2e904fe06e06c8bb42847096a24270cb8b5a151cf06aa09f8b57696f1 +size 2414 diff --git a/excerpt-data/182806147755e799/gnomad.v4.0.sv.chr5.vcf.gz b/excerpt-data/182806147755e799/gnomad.v4.0.sv.chr5.vcf.gz index d4f3d44..76ce903 100644 --- a/excerpt-data/182806147755e799/gnomad.v4.0.sv.chr5.vcf.gz +++ b/excerpt-data/182806147755e799/gnomad.v4.0.sv.chr5.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43e5e11083b13f47017110cae922117c41ed43f9c8013e607bede09420f2e879 -size 416702 +oid sha256:1740adcb2bc85aaed72d88e3b95486d62cd70637db51b58d183661c2624df7c6 +size 6689279 diff --git a/excerpt-data/182806147755e799/gnomad.v4.0.sv.chr5.vcf.gz.tbi b/excerpt-data/182806147755e799/gnomad.v4.0.sv.chr5.vcf.gz.tbi index f1f4843..ae8a422 100644 --- a/excerpt-data/182806147755e799/gnomad.v4.0.sv.chr5.vcf.gz.tbi +++ b/excerpt-data/182806147755e799/gnomad.v4.0.sv.chr5.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6080f8b3782bf82e3a8d94865db9e4c6f5a7a89fc5009fedcdbe307a20697dd4 -size 204 +oid sha256:d84c4b05293a766d51f78ecbf3a367c8901812eaf07ab15ca0f233d761d3d8c1 +size 2055 diff --git a/excerpt-data/19a12d845df10514/gnomad.v4.0.sv.chr18.vcf.gz b/excerpt-data/19a12d845df10514/gnomad.v4.0.sv.chr18.vcf.gz index 6ec4dbd..bf2caf5 100644 --- a/excerpt-data/19a12d845df10514/gnomad.v4.0.sv.chr18.vcf.gz +++ b/excerpt-data/19a12d845df10514/gnomad.v4.0.sv.chr18.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34fa9e73c212aec43d742bef36eeae3d50b4997a17c5843e0ebf817c246bf6fc -size 381786 +oid sha256:eecde40cf35a148e54dd0eddfb9383c7d9e13b5f3291a45f5d2f725b455d2ff6 +size 6185812 diff --git a/excerpt-data/19a12d845df10514/gnomad.v4.0.sv.chr18.vcf.gz.tbi b/excerpt-data/19a12d845df10514/gnomad.v4.0.sv.chr18.vcf.gz.tbi index 7ba3477..4c0cc56 100644 --- a/excerpt-data/19a12d845df10514/gnomad.v4.0.sv.chr18.vcf.gz.tbi +++ b/excerpt-data/19a12d845df10514/gnomad.v4.0.sv.chr18.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc0fbd858f71ab97f0aad603982beeabb0edc6e830da0babfd8873d5e12a023a -size 212 +oid sha256:ba062883850887fb231bcc2a45a5c2b1ce3704530e5eb09f99e44945e16de808 +size 1721 diff --git a/excerpt-data/2245e9d640b3f206/gnomad.v4.0.sv.chrX.vcf.gz b/excerpt-data/2245e9d640b3f206/gnomad.v4.0.sv.chrX.vcf.gz index 4464d4b..0a3719f 100644 --- a/excerpt-data/2245e9d640b3f206/gnomad.v4.0.sv.chrX.vcf.gz +++ b/excerpt-data/2245e9d640b3f206/gnomad.v4.0.sv.chrX.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3127df711b9c38a08dc317be240704e3ef20b7b86e8e739677429823aa37fd4d -size 398015 +oid sha256:3d4f560a4f3912f615f99328ebc200c12d6bae4e60fdbb7718f80bf6644e5182 +size 6690067 diff --git a/excerpt-data/2245e9d640b3f206/gnomad.v4.0.sv.chrX.vcf.gz.tbi b/excerpt-data/2245e9d640b3f206/gnomad.v4.0.sv.chrX.vcf.gz.tbi index 3ba8472..18c5ac2 100644 --- a/excerpt-data/2245e9d640b3f206/gnomad.v4.0.sv.chrX.vcf.gz.tbi +++ b/excerpt-data/2245e9d640b3f206/gnomad.v4.0.sv.chrX.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7073665b518fc929a8049a5b9101103ec7628f844d04ff8943ff98653ff91d59 -size 317 +oid sha256:812ed56d6df2c2fa20d79f8f1d1e6456f2e81cdff12b94c8320994244311f4f3 +size 3151 diff --git a/excerpt-data/2c4af2ee68c51be6/clinvar-strucvar-grch38-2023-0625+0.6.3.tar.gz b/excerpt-data/2c4af2ee68c51be6/clinvar-strucvar-grch38-2023-0625+0.6.3.tar.gz deleted file mode 100644 index f57bdf0..0000000 --- a/excerpt-data/2c4af2ee68c51be6/clinvar-strucvar-grch38-2023-0625+0.6.3.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:93654972056197cfa595d79d33469a68489e70292fe55efe66f2346b9a721dd3 -size 9853972 diff --git a/excerpt-data/2c4af2ee68c51be6/url.txt b/excerpt-data/2c4af2ee68c51be6/url.txt deleted file mode 100644 index 71f585c..0000000 --- a/excerpt-data/2c4af2ee68c51be6/url.txt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3fbbb56c28633c107e6bc208ead69ba07444a2022f5ebce2a8b4e961482da2b9 -size 139 diff --git a/excerpt-data/3390d9ca9ce97bac/gnomad.v4.0.sv.chr2.vcf.gz b/excerpt-data/3390d9ca9ce97bac/gnomad.v4.0.sv.chr2.vcf.gz index c72d13f..f1f0f38 100644 --- a/excerpt-data/3390d9ca9ce97bac/gnomad.v4.0.sv.chr2.vcf.gz +++ b/excerpt-data/3390d9ca9ce97bac/gnomad.v4.0.sv.chr2.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d20cec346e12949e6eb0cb6b197063ce5d113b4a2d5e9073bc843274bec95aa5 -size 424653 +oid sha256:3043cc98b3394310d394c27532ae6648d099b7c764f14205b4b0d1aa9412a8af +size 6515761 diff --git a/excerpt-data/3390d9ca9ce97bac/gnomad.v4.0.sv.chr2.vcf.gz.tbi b/excerpt-data/3390d9ca9ce97bac/gnomad.v4.0.sv.chr2.vcf.gz.tbi index 9c9387e..37ab703 100644 --- a/excerpt-data/3390d9ca9ce97bac/gnomad.v4.0.sv.chr2.vcf.gz.tbi +++ b/excerpt-data/3390d9ca9ce97bac/gnomad.v4.0.sv.chr2.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05175769d37521c3f7d755fe3adc6f575cec7b2e4799fb6806dca7f1a761fa66 -size 135 +oid sha256:24e59de26106a7998dfc01bdadd3add55d8021762d7beab80f57411f67c18483 +size 2458 diff --git a/excerpt-data/3b19a5df654e6b4f/gnomad.v4.0.sv.chrY.vcf.gz b/excerpt-data/3b19a5df654e6b4f/gnomad.v4.0.sv.chrY.vcf.gz index 219880c..afb922a 100644 --- a/excerpt-data/3b19a5df654e6b4f/gnomad.v4.0.sv.chrY.vcf.gz +++ b/excerpt-data/3b19a5df654e6b4f/gnomad.v4.0.sv.chrY.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b7ff9dc388be77a7f4ff603eec87d678d8a4ecd5913396988703e458cda553c -size 430809 +oid sha256:2bb891b19a29d9b8668a2ef8af5fecb195419abda07cddb41783af1fa694590b +size 7560748 diff --git a/excerpt-data/3b19a5df654e6b4f/gnomad.v4.0.sv.chrY.vcf.gz.tbi b/excerpt-data/3b19a5df654e6b4f/gnomad.v4.0.sv.chrY.vcf.gz.tbi index 1d703af..6563d30 100644 --- a/excerpt-data/3b19a5df654e6b4f/gnomad.v4.0.sv.chrY.vcf.gz.tbi +++ b/excerpt-data/3b19a5df654e6b4f/gnomad.v4.0.sv.chrY.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0e310746236a220fb0ba3d1dcbdeebc16499138d45f1ae25988d69371766ec5 -size 293 +oid sha256:bff46ca5ea77975b22eacc32111bb5b05d7585697a29799363ad6b326ede075b +size 1493 diff --git a/excerpt-data/41e368687b2c69e4/gnomad.v4.0.sv.chr7.vcf.gz b/excerpt-data/41e368687b2c69e4/gnomad.v4.0.sv.chr7.vcf.gz index 5c443d7..7cb1d66 100644 --- a/excerpt-data/41e368687b2c69e4/gnomad.v4.0.sv.chr7.vcf.gz +++ b/excerpt-data/41e368687b2c69e4/gnomad.v4.0.sv.chr7.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:833c7e97efa6ecc0c2038c6bd9e07c63b1c26025e9efb39b599780c3ff00c2a0 -size 412586 +oid sha256:2b5ae353018acda796ebc4f1fc889591e9799c5a3f19bf040e678e81538d5eb5 +size 6679506 diff --git a/excerpt-data/41e368687b2c69e4/gnomad.v4.0.sv.chr7.vcf.gz.tbi b/excerpt-data/41e368687b2c69e4/gnomad.v4.0.sv.chr7.vcf.gz.tbi index 0cc454e..121ce17 100644 --- a/excerpt-data/41e368687b2c69e4/gnomad.v4.0.sv.chr7.vcf.gz.tbi +++ b/excerpt-data/41e368687b2c69e4/gnomad.v4.0.sv.chr7.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0517a6e670aeba67b7f449c7e55c52cb82560946996cf465c66034abb7e71fb7 -size 283 +oid sha256:8d58c755d46f4d1dab27498726eca16f3c8b5ad1dcd69a61511ee451cd24b04a +size 2122 diff --git a/excerpt-data/555fd2fa82d1b73d/gnomad.v4.0.sv.chr11.vcf.gz b/excerpt-data/555fd2fa82d1b73d/gnomad.v4.0.sv.chr11.vcf.gz index 54cbe3b..ae114ad 100644 --- a/excerpt-data/555fd2fa82d1b73d/gnomad.v4.0.sv.chr11.vcf.gz +++ b/excerpt-data/555fd2fa82d1b73d/gnomad.v4.0.sv.chr11.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21a5dd8df2ba95bd07b6c237b3c11604e649b121973fd84e38565e0cb70dbcf0 -size 424520 +oid sha256:d70ca0e501fa10befe3833a952774009754a344c500303e2e96391b4b8bae3f1 +size 6384054 diff --git a/excerpt-data/555fd2fa82d1b73d/gnomad.v4.0.sv.chr11.vcf.gz.tbi b/excerpt-data/555fd2fa82d1b73d/gnomad.v4.0.sv.chr11.vcf.gz.tbi index f389d98..0d13933 100644 --- a/excerpt-data/555fd2fa82d1b73d/gnomad.v4.0.sv.chr11.vcf.gz.tbi +++ b/excerpt-data/555fd2fa82d1b73d/gnomad.v4.0.sv.chr11.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43ef41f6c4cc59b8b188fa3822f3863661f840af87326aac623ab6155541161e -size 300 +oid sha256:b19d611a4d0ca021092470582223c575dd7b53b4f636e50abac9db7928aebb1e +size 2209 diff --git a/excerpt-data/6433022d4690f461/gnomad.v4.0.sv.chr21.vcf.gz b/excerpt-data/6433022d4690f461/gnomad.v4.0.sv.chr21.vcf.gz index 637fcc5..68d13d4 100644 --- a/excerpt-data/6433022d4690f461/gnomad.v4.0.sv.chr21.vcf.gz +++ b/excerpt-data/6433022d4690f461/gnomad.v4.0.sv.chr21.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1aea4d0f2dac8fe5c0101bb8d4b0284f03720a014776ecb448e6ab0e0d2c51d -size 403589 +oid sha256:ba9e29d1bc7bb590619430955a4cecfe757805bb7a4eadeb527c22325e542f71 +size 6805748 diff --git a/excerpt-data/6433022d4690f461/gnomad.v4.0.sv.chr21.vcf.gz.tbi b/excerpt-data/6433022d4690f461/gnomad.v4.0.sv.chr21.vcf.gz.tbi index cd03395..526029a 100644 --- a/excerpt-data/6433022d4690f461/gnomad.v4.0.sv.chr21.vcf.gz.tbi +++ b/excerpt-data/6433022d4690f461/gnomad.v4.0.sv.chr21.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b9b5e428fdbe544a37f0db1f63ebf7322e5847210be9e2f0995d44753020878 -size 274 +oid sha256:acb011b651a71b9f7d90271c0151d353a665d9ba8ba11c7b770a32c4d307f904 +size 891 diff --git a/excerpt-data/6f337fba39cc4c2e/gnomad.v4.0.sv.chr13.vcf.gz b/excerpt-data/6f337fba39cc4c2e/gnomad.v4.0.sv.chr13.vcf.gz index e7fbdcc..888c096 100644 --- a/excerpt-data/6f337fba39cc4c2e/gnomad.v4.0.sv.chr13.vcf.gz +++ b/excerpt-data/6f337fba39cc4c2e/gnomad.v4.0.sv.chr13.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae4baafd36a434434e3dd8a9e66a1a25387e4ca1c486e7b2d17f0ed6301187a5 -size 513889 +oid sha256:31dccd380e27ebf5112bea61423bb18cc0f99e824721759e8dcdf77f515f7cff +size 6281142 diff --git a/excerpt-data/6f337fba39cc4c2e/gnomad.v4.0.sv.chr13.vcf.gz.tbi b/excerpt-data/6f337fba39cc4c2e/gnomad.v4.0.sv.chr13.vcf.gz.tbi index b44c5ac..7cb1797 100644 --- a/excerpt-data/6f337fba39cc4c2e/gnomad.v4.0.sv.chr13.vcf.gz.tbi +++ b/excerpt-data/6f337fba39cc4c2e/gnomad.v4.0.sv.chr13.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9c1e4e65c1abd5f78ef55075c3d81e94bb5d074310adfabff3f4ddd42d79b40 -size 297 +oid sha256:2551bc08b58e9240b407054ea009f24a94d6217c38f2bb6df597140d91ff460d +size 2077 diff --git a/excerpt-data/9cc0f2ee73775510/gnomad.v4.0.sv.chr10.vcf.gz b/excerpt-data/9cc0f2ee73775510/gnomad.v4.0.sv.chr10.vcf.gz index 6969b91..fcf9a02 100644 --- a/excerpt-data/9cc0f2ee73775510/gnomad.v4.0.sv.chr10.vcf.gz +++ b/excerpt-data/9cc0f2ee73775510/gnomad.v4.0.sv.chr10.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32b8f2049412947a837c1dc5d04a55dca2cbf31cd22e899b6a297a4752c5fdfa -size 390567 +oid sha256:f88d409f87850e1ecbdccb7f2aa650df4be9789081a2b87c579f70b48f46671d +size 6476600 diff --git a/excerpt-data/9cc0f2ee73775510/gnomad.v4.0.sv.chr10.vcf.gz.tbi b/excerpt-data/9cc0f2ee73775510/gnomad.v4.0.sv.chr10.vcf.gz.tbi index 8011123..cc9f257 100644 --- a/excerpt-data/9cc0f2ee73775510/gnomad.v4.0.sv.chr10.vcf.gz.tbi +++ b/excerpt-data/9cc0f2ee73775510/gnomad.v4.0.sv.chr10.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86992343986e8d296dba0980b53dc3622f24d1c8521b08385e47c5aece22a75a -size 213 +oid sha256:26d10a3c0c54fe021a4ab559ec28bf09357eab002c76ffc23bc8f29e0a86bb0b +size 1518 diff --git a/excerpt-data/a271968c7bf845c0/gnomad.v4.0.sv.chr16.vcf.gz b/excerpt-data/a271968c7bf845c0/gnomad.v4.0.sv.chr16.vcf.gz index 322d0da..26d1903 100644 --- a/excerpt-data/a271968c7bf845c0/gnomad.v4.0.sv.chr16.vcf.gz +++ b/excerpt-data/a271968c7bf845c0/gnomad.v4.0.sv.chr16.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68b2163951986ea1206924defa2e9a22ca31f7a5bb465e9b1a29fbbb584ec047 -size 388996 +oid sha256:9c7829fdb0d4749d7324b6f6aaa70ad788ef2c68ac13788674ba3baf40beb70f +size 6376812 diff --git a/excerpt-data/a271968c7bf845c0/gnomad.v4.0.sv.chr16.vcf.gz.tbi b/excerpt-data/a271968c7bf845c0/gnomad.v4.0.sv.chr16.vcf.gz.tbi index 8d58c7f..755821a 100644 --- a/excerpt-data/a271968c7bf845c0/gnomad.v4.0.sv.chr16.vcf.gz.tbi +++ b/excerpt-data/a271968c7bf845c0/gnomad.v4.0.sv.chr16.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a5db430f4f13d480b945a2c2c737d6f64b1f44d3910b66e13a9077b7a608d38 -size 289 +oid sha256:cad6447bb6c93bbb49c255967609c2151c407adabb7b6b82afad3c58b9b878c8 +size 2117 diff --git a/excerpt-data/a83da02295a7ae02/gnomad.v4.0.sv.chr20.vcf.gz b/excerpt-data/a83da02295a7ae02/gnomad.v4.0.sv.chr20.vcf.gz index e9079b7..50b4454 100644 --- a/excerpt-data/a83da02295a7ae02/gnomad.v4.0.sv.chr20.vcf.gz +++ b/excerpt-data/a83da02295a7ae02/gnomad.v4.0.sv.chr20.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45a1c85a7356d6221410ad7aefeeef836403758cfe2bceebc0cee4c0987b3554 -size 376158 +oid sha256:4c5aacd2e859bdf598d4f19ddc7fb0e3f2de27f94a3ce02614b7f3079e5bbd47 +size 6051918 diff --git a/excerpt-data/a83da02295a7ae02/gnomad.v4.0.sv.chr20.vcf.gz.tbi b/excerpt-data/a83da02295a7ae02/gnomad.v4.0.sv.chr20.vcf.gz.tbi index ad5e235..f68838d 100644 --- a/excerpt-data/a83da02295a7ae02/gnomad.v4.0.sv.chr20.vcf.gz.tbi +++ b/excerpt-data/a83da02295a7ae02/gnomad.v4.0.sv.chr20.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19aeacefa75b206ec58916e13cc2f84ea7c2c870db0fa9544d85e7d5c224fd29 -size 239 +oid sha256:46d5418c8ac4dec8a93c9f593a480384ba960ab76272b7b3fb85d4cb940117e6 +size 1340 diff --git a/excerpt-data/aea53328ecd0b712/gnomad.v4.0.sv.chr6.vcf.gz b/excerpt-data/aea53328ecd0b712/gnomad.v4.0.sv.chr6.vcf.gz index f3ddf41..9e8c534 100644 --- a/excerpt-data/aea53328ecd0b712/gnomad.v4.0.sv.chr6.vcf.gz +++ b/excerpt-data/aea53328ecd0b712/gnomad.v4.0.sv.chr6.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11e3601dec127e533cc7902490cb4c0c29a002c85e6bf157200409fbdc392597 -size 396393 +oid sha256:254a1732d4025d411f23d12031145ea0eb7f50c1ffe4d3a050ce17fd04ef7e1c +size 6321882 diff --git a/excerpt-data/aea53328ecd0b712/gnomad.v4.0.sv.chr6.vcf.gz.tbi b/excerpt-data/aea53328ecd0b712/gnomad.v4.0.sv.chr6.vcf.gz.tbi index a62c841..f708adb 100644 --- a/excerpt-data/aea53328ecd0b712/gnomad.v4.0.sv.chr6.vcf.gz.tbi +++ b/excerpt-data/aea53328ecd0b712/gnomad.v4.0.sv.chr6.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51b989744861351f01c9a9ee9074e93d257925bf5552b94fdb00110e4c82c8e7 -size 270 +oid sha256:40d451f2f871cc42b32417b5359bb0835b08f26aa81769a3597b00c77da91c26 +size 1750 diff --git a/excerpt-data/bc6f1860df305e83/gnomad.v4.0.sv.chr17.vcf.gz b/excerpt-data/bc6f1860df305e83/gnomad.v4.0.sv.chr17.vcf.gz index 0526ba5..23109a6 100644 --- a/excerpt-data/bc6f1860df305e83/gnomad.v4.0.sv.chr17.vcf.gz +++ b/excerpt-data/bc6f1860df305e83/gnomad.v4.0.sv.chr17.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd6c9d5dd5475c57b5da8bc2d60ad97ba08769796cfba18ceaf31e12bf97700f -size 389157 +oid sha256:3f5c13e0ede194a02fe2fe4469dd44e27358bab5e05c17ba1f44243e1401dda2 +size 6531028 diff --git a/excerpt-data/bc6f1860df305e83/gnomad.v4.0.sv.chr17.vcf.gz.tbi b/excerpt-data/bc6f1860df305e83/gnomad.v4.0.sv.chr17.vcf.gz.tbi index c05e188..1779306 100644 --- a/excerpt-data/bc6f1860df305e83/gnomad.v4.0.sv.chr17.vcf.gz.tbi +++ b/excerpt-data/bc6f1860df305e83/gnomad.v4.0.sv.chr17.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8543baabca5fd4e7b31bf104ca3933e033feeaa80935df3308664018c4255596 -size 307 +oid sha256:4c9205528533d316e0fba07e609d7924bc11c3318d79c7edc9be21e3f103228a +size 1902 diff --git a/excerpt-data/bdc69c1e4cafdfaa/clinvar-data-extract-vars-20240612+0.17.0.tar.gz b/excerpt-data/bdc69c1e4cafdfaa/clinvar-data-extract-vars-20240612+0.17.0.tar.gz new file mode 100644 index 0000000..9a316a0 --- /dev/null +++ b/excerpt-data/bdc69c1e4cafdfaa/clinvar-data-extract-vars-20240612+0.17.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a17a234875bb8ea38a7c46b396a24aa72fec1289705053628542b2ecf93d64d5 +size 5032347 diff --git a/excerpt-data/bdc69c1e4cafdfaa/url.txt b/excerpt-data/bdc69c1e4cafdfaa/url.txt new file mode 100644 index 0000000..ef5318b --- /dev/null +++ b/excerpt-data/bdc69c1e4cafdfaa/url.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da8940a1bee1f7dc5b79275b744ec1344cf4a393d07c7dda5fe4ac563ab9618f +size 141 diff --git a/excerpt-data/c3ce41a42c6319e0/gnomad.v4.0.sv.chr22.vcf.gz b/excerpt-data/c3ce41a42c6319e0/gnomad.v4.0.sv.chr22.vcf.gz index bafb9c2..1292ea8 100644 --- a/excerpt-data/c3ce41a42c6319e0/gnomad.v4.0.sv.chr22.vcf.gz +++ b/excerpt-data/c3ce41a42c6319e0/gnomad.v4.0.sv.chr22.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f06299de29069f7bb1354720250079f0bd691376ad9274f1ecd42e02b33a9d26 -size 407737 +oid sha256:125323ebcb71d8e983901e49dcf2237438510a11c1280474c7e48f23e943e911 +size 6986223 diff --git a/excerpt-data/c3ce41a42c6319e0/gnomad.v4.0.sv.chr22.vcf.gz.tbi b/excerpt-data/c3ce41a42c6319e0/gnomad.v4.0.sv.chr22.vcf.gz.tbi index b86c2e1..0751bf1 100644 --- a/excerpt-data/c3ce41a42c6319e0/gnomad.v4.0.sv.chr22.vcf.gz.tbi +++ b/excerpt-data/c3ce41a42c6319e0/gnomad.v4.0.sv.chr22.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c89d49b0895ac0624702ecb5cf3ad9aaf3583c553dda1461ec7db5f35b9d9d24 -size 247 +oid sha256:f064ca672987105191b3608e4a03d1b19713fb2a4aedc0ae7369e254a76ad0f5 +size 838 diff --git a/excerpt-data/ca218c36ad8374d2/gnomad.v4.0.sv.chr14.vcf.gz b/excerpt-data/ca218c36ad8374d2/gnomad.v4.0.sv.chr14.vcf.gz index 2b4f09a..e3829d4 100644 --- a/excerpt-data/ca218c36ad8374d2/gnomad.v4.0.sv.chr14.vcf.gz +++ b/excerpt-data/ca218c36ad8374d2/gnomad.v4.0.sv.chr14.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9680b2acae61e70b665a4b4ce01aa80450fa40b349c73ea86220857242bc69c9 -size 390266 +oid sha256:b50f218213ccedabf2cc606bf79dfdd8ab47454540f6d7f9e6129a8060395985 +size 6319944 diff --git a/excerpt-data/ca218c36ad8374d2/gnomad.v4.0.sv.chr14.vcf.gz.tbi b/excerpt-data/ca218c36ad8374d2/gnomad.v4.0.sv.chr14.vcf.gz.tbi index a6a7b00..b9b01d1 100644 --- a/excerpt-data/ca218c36ad8374d2/gnomad.v4.0.sv.chr14.vcf.gz.tbi +++ b/excerpt-data/ca218c36ad8374d2/gnomad.v4.0.sv.chr14.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ed6a3bf8df7d6bbe8a9ec03e872a27fe3dfb83c45334afa9de77979a35d7503 -size 274 +oid sha256:0e68ef4193b1e7d14739aca7229a3c9c149125112a624a34f981fb38572bb597 +size 1462 diff --git a/excerpt-data/d344a91c116abfac/clinvar-strucvar-grch37-2023-0625+0.6.3.tar.gz b/excerpt-data/d344a91c116abfac/clinvar-strucvar-grch37-2023-0625+0.6.3.tar.gz deleted file mode 100644 index 6f288ed..0000000 --- a/excerpt-data/d344a91c116abfac/clinvar-strucvar-grch37-2023-0625+0.6.3.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd9b82f62a8bf5087865936c880713b130f4bd89e4f83b57d009eba5c56daa6a -size 25515630 diff --git a/excerpt-data/d344a91c116abfac/url.txt b/excerpt-data/d344a91c116abfac/url.txt deleted file mode 100644 index 1d750d3..0000000 --- a/excerpt-data/d344a91c116abfac/url.txt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4b6a1a6de7f1e79b86e58b85beecab109697db708ae77e975155a9ff7ea1591d -size 139 diff --git a/excerpt-data/ec5c8ad8a9c29dd0/gnomad.v4.0.sv.chr12.vcf.gz b/excerpt-data/ec5c8ad8a9c29dd0/gnomad.v4.0.sv.chr12.vcf.gz index 4d7d200..48ac1a7 100644 --- a/excerpt-data/ec5c8ad8a9c29dd0/gnomad.v4.0.sv.chr12.vcf.gz +++ b/excerpt-data/ec5c8ad8a9c29dd0/gnomad.v4.0.sv.chr12.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c83d34bfb14dabcb882ead05e1f4d93d21570d7323a924576577723d33f83368 -size 394149 +oid sha256:02ea5c3a8ac394a7eb2251d07c85cc91289d57d6e4f55f52eaa0dde042eddafb +size 6310150 diff --git a/excerpt-data/ec5c8ad8a9c29dd0/gnomad.v4.0.sv.chr12.vcf.gz.tbi b/excerpt-data/ec5c8ad8a9c29dd0/gnomad.v4.0.sv.chr12.vcf.gz.tbi index 8978e7c..d316b5d 100644 --- a/excerpt-data/ec5c8ad8a9c29dd0/gnomad.v4.0.sv.chr12.vcf.gz.tbi +++ b/excerpt-data/ec5c8ad8a9c29dd0/gnomad.v4.0.sv.chr12.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1f45cc748138b3fe02e88d974183023786371e1bbc3a0c21516790fd1436405 -size 197 +oid sha256:283823c821d7633cf1776edc855c5ff1e57bff413b502787d37b984dd9627ed5 +size 2188 diff --git a/excerpt-data/eed573336b6205b5/gnomad.v4.0.sv.chr19.vcf.gz b/excerpt-data/eed573336b6205b5/gnomad.v4.0.sv.chr19.vcf.gz index dcd5648..a2735d9 100644 --- a/excerpt-data/eed573336b6205b5/gnomad.v4.0.sv.chr19.vcf.gz +++ b/excerpt-data/eed573336b6205b5/gnomad.v4.0.sv.chr19.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:278c849bcd5536c270761a8c9546c1669074bbc3a07630445096de163cf90155 -size 407544 +oid sha256:c8efc11c0de0794bcc80e1b2e4c7e8e703aefc83681add471cc668014cfa9f6c +size 6508922 diff --git a/excerpt-data/eed573336b6205b5/gnomad.v4.0.sv.chr19.vcf.gz.tbi b/excerpt-data/eed573336b6205b5/gnomad.v4.0.sv.chr19.vcf.gz.tbi index e510d68..36d0876 100644 --- a/excerpt-data/eed573336b6205b5/gnomad.v4.0.sv.chr19.vcf.gz.tbi +++ b/excerpt-data/eed573336b6205b5/gnomad.v4.0.sv.chr19.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d320c44686ec998acba35fd29dc265738b551b94dff4f12039a46229841e6d15 -size 212 +oid sha256:a668e38fb913b6ade9c9c1a88fbb75ed253881886af25414c30480d0d1eb72ac +size 2026 diff --git a/excerpt-data/ef1e660e8e0359bc/gnomad.v4.0.sv.chr1.vcf.gz b/excerpt-data/ef1e660e8e0359bc/gnomad.v4.0.sv.chr1.vcf.gz index e4a5ee8..881cc90 100644 --- a/excerpt-data/ef1e660e8e0359bc/gnomad.v4.0.sv.chr1.vcf.gz +++ b/excerpt-data/ef1e660e8e0359bc/gnomad.v4.0.sv.chr1.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94d8ed19180abf8922f5b863ae3ce8bffd9e186aebadd8e81bb3c88eaeef5709 -size 430679 +oid sha256:623597e1759dd8b2e4d72dfb7f7980e9b04b74cc25d6fd921a6295b5028b2b5b +size 6661068 diff --git a/excerpt-data/ef1e660e8e0359bc/gnomad.v4.0.sv.chr1.vcf.gz.tbi b/excerpt-data/ef1e660e8e0359bc/gnomad.v4.0.sv.chr1.vcf.gz.tbi index 48ced38..cca55c5 100644 --- a/excerpt-data/ef1e660e8e0359bc/gnomad.v4.0.sv.chr1.vcf.gz.tbi +++ b/excerpt-data/ef1e660e8e0359bc/gnomad.v4.0.sv.chr1.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb12c4f84202a4839fed7c6f581f8ee44a14aedfea5d4368cf7eb0d34d9d2b5c -size 206 +oid sha256:3e40a0014c5b48ef72a5c01381793a873aec59a9eee53ab0cf310faff4f48e98 +size 1825 diff --git a/excerpt-data/f5375ffc6d0e6adb/gnomad.v4.0.sv.chr9.vcf.gz b/excerpt-data/f5375ffc6d0e6adb/gnomad.v4.0.sv.chr9.vcf.gz index c901c95..eade6bd 100644 --- a/excerpt-data/f5375ffc6d0e6adb/gnomad.v4.0.sv.chr9.vcf.gz +++ b/excerpt-data/f5375ffc6d0e6adb/gnomad.v4.0.sv.chr9.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6c16bcd58417d4711d91dc190ea44c667ebc07f59c71af3418b94ba22a68263 -size 426303 +oid sha256:3ddcdab7fad2abfd8fc13947766a56db1a9fc8d16e56afbe2571400539d31f94 +size 6229224 diff --git a/excerpt-data/f5375ffc6d0e6adb/gnomad.v4.0.sv.chr9.vcf.gz.tbi b/excerpt-data/f5375ffc6d0e6adb/gnomad.v4.0.sv.chr9.vcf.gz.tbi index b2b0386..72d64df 100644 --- a/excerpt-data/f5375ffc6d0e6adb/gnomad.v4.0.sv.chr9.vcf.gz.tbi +++ b/excerpt-data/f5375ffc6d0e6adb/gnomad.v4.0.sv.chr9.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a53e0f00761d5178b0efa713bb8b761a0032089d99c055b0da7a2331c4d5bbc5 -size 234 +oid sha256:771b7765c9aaf23b0b36026a5a2f5b6272e34ce301d430c32d2e6230c7c57479 +size 2624 diff --git a/excerpt-data/fe2a55106424e727/gnomad.v4.0.sv.chr8.vcf.gz b/excerpt-data/fe2a55106424e727/gnomad.v4.0.sv.chr8.vcf.gz index 91548ba..94dbf3d 100644 --- a/excerpt-data/fe2a55106424e727/gnomad.v4.0.sv.chr8.vcf.gz +++ b/excerpt-data/fe2a55106424e727/gnomad.v4.0.sv.chr8.vcf.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d59084887e82047051f15439598bdad4aa2d00052e01a01a0b8ed3caff80a38b -size 408984 +oid sha256:f5eff7946221bb0a00e6341e8c5da2c0afef2a9dd7f7c9afdb4be25683c19e76 +size 6524856 diff --git a/excerpt-data/fe2a55106424e727/gnomad.v4.0.sv.chr8.vcf.gz.tbi b/excerpt-data/fe2a55106424e727/gnomad.v4.0.sv.chr8.vcf.gz.tbi index c3cb02d..2e273a7 100644 --- a/excerpt-data/fe2a55106424e727/gnomad.v4.0.sv.chr8.vcf.gz.tbi +++ b/excerpt-data/fe2a55106424e727/gnomad.v4.0.sv.chr8.vcf.gz.tbi @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9073baa6b820ce0b318e81d5d7e9682a615472336222ab4659e4376a8688d8fe -size 235 +oid sha256:1f375aba79bd150d346e029ffcb2a6b512203ea9f58e5eab28d84d66ddefcf50 +size 1998 diff --git a/rules/output/worker/bgdb.smk b/rules/output/worker/bgdb.smk index 1fcf0d0..3caf612 100644 --- a/rules/output/worker/bgdb.smk +++ b/rules/output/worker/bgdb.smk @@ -9,10 +9,10 @@ rule output_worker_bgdb_g1k: spec=f"output/full/worker/bgdb-g1k-grch37-{{version}}+{PV.worker}/bgdb-g1k.spec.yaml", shell: r""" - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type strucvar-g1k \ --path-input {input.bed} \ - --path-output-bin {output.bin} + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/bgdb.spec.yaml \ @@ -40,10 +40,10 @@ rule output_worker_bgdb_exac: spec=f"output/full/worker/bgdb-exac-grch37-{{version}}+{PV.worker}/bgdb-exac.spec.yaml", shell: r""" - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type strucvar-exac-cnv \ --path-input {input.bed} \ - --path-output-bin {output.bin} + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/bgdb.spec.yaml \ @@ -63,7 +63,38 @@ rule output_worker_bgdb_exac: """ -rule output_worker_bgdb_gnomad: +rule output_worker_bgdb_gnomad_exomes_cnv_grch38: + input: + bed=f"output/full/tracks/track-strucvars-gnomad-cnv-grch38-{{version}}+{DV.tracks}/gnomad-cnv.bed.gz", + output: + bin=f"output/full/worker/bgdb-gnomad-exomes-cnv-grch38-{{version}}+{PV.worker}/bgdb-gnomad-exomes-cnv-grch38.bin", + spec=f"output/full/worker/bgdb-gnomad-exomes-cnv-grch38-{{version}}+{PV.worker}/bgdb-gnomad-exomes-cnv-grch38.spec.yaml", + shell: + r""" + varfish-server-worker strucvars txt-to-bin \ + --input-type strucvar-gnomad-cnv4 \ + --path-input {input.bed} \ + --path-output {output.bin} + + varfish-db-downloader tpl \ + --template rules/output/worker/bgdb.spec.yaml \ + \ + --value db_name=gnomad-exomes-cnv \ + --value title="gnomAD Exomes CNV" \ + --value creator="gnomAD Consortium" \ + --value source="https://gnomad.broadinstitute.org/downloads#v4-copy-number-variants" \ + \ + --value version={wildcards.version}+{PV.worker} \ + --value today={TODAY} \ + --value genome_release=grch38 \ + \ + --value v_worker={PV.worker} \ + --value v_downloader={PV.downloader} \ + > {output.spec} + """ + + +rule output_worker_bgdb_gnomad_sv_grch37: input: bed=f"output/full/tracks/track-strucvars-gnomad-grch37-{{version}}+{DV.tracks}/gnomad.bed.gz", output: @@ -71,11 +102,10 @@ rule output_worker_bgdb_gnomad: spec=f"output/full/worker/bgdb-gnomad-grch37-{{version}}+{PV.worker}/bgdb-gnomad.spec.yaml", shell: r""" - varfish-server-worker db to-bin \ - --input-type strucvar-gnomad-sv \ + varfish-server-worker strucvars txt-to-bin \ + --input-type strucvar-gnomad-sv2 \ --path-input {input.bed} \ - --path-output-bin {output.bin} - + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/bgdb.spec.yaml \ @@ -95,6 +125,37 @@ rule output_worker_bgdb_gnomad: """ +rule output_worker_bgdb_gnomad_genomes_sv_grch38: + input: + bed=f"output/full/tracks/track-strucvars-gnomad-sv-grch38-{{version}}+{DV.tracks}/gnomad-sv.bed.gz", + output: + bin=f"output/full/worker/bgdb-gnomad-genomes-sv-grch38-{{version}}+{PV.worker}/bgdb-gnomad-genomes-sv-grch38.bin", + spec=f"output/full/worker/bgdb-gnomad-genomes-sv-grch38-{{version}}+{PV.worker}/bgdb-gnomad-genomes-sv-grch38.spec.yaml", + shell: + r""" + varfish-server-worker strucvars txt-to-bin \ + --input-type strucvar-gnomad-sv4 \ + --path-input {input.bed} \ + --path-output {output.bin} + + varfish-db-downloader tpl \ + --template rules/output/worker/bgdb.spec.yaml \ + \ + --value db_name=gnomad \ + --value title="gnomAD-SVs" \ + --value creator="gnomAD Consortium" \ + --value source="https://gnomad.broadinstitute.org/downloads#v4-structural-variants" \ + \ + --value version={wildcards.version}+{PV.worker} \ + --value today={TODAY} \ + --value genome_release=grch38 \ + \ + --value v_worker={PV.worker} \ + --value v_downloader={PV.downloader} \ + > {output.spec} + """ + + rule output_worker_bgdb_dbvar: input: bed=f"output/full/tracks/track-strucvars-dbvar-{{genome_release}}-{{version}}+{DV.tracks}/dbvar.bed.gz", @@ -103,10 +164,10 @@ rule output_worker_bgdb_dbvar: spec=f"output/full/worker/bgdb-dbvar-{{genome_release}}-{{version}}+{PV.worker}/bgdb-dbvar.spec.yaml", shell: r""" - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type strucvar-db-var \ --path-input {input.bed} \ - --path-output-bin {output.bin} + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/bgdb.spec.yaml \ @@ -134,10 +195,10 @@ rule output_worker_bgdb_dgv: spec=f"output/full/worker/bgdb-dgv-{{genome_release}}-{{version}}+{PV.worker}/bgdb-dgv.spec.yaml", shell: r""" - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type strucvar-dgv \ --path-input {input.bed} \ - --path-output-bin {output.bin} + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/bgdb.spec.yaml \ @@ -165,10 +226,10 @@ rule output_worker_bgdb_dgv_gs: spec=f"output/full/worker/bgdb-dgv-gs-{{genome_release}}-{{version}}+{PV.worker}/bgdb-dgv-gs.spec.yaml", shell: r""" - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type strucvar-dgv-gs \ --path-input {input.bed} \ - --path-output-bin {output.bin} + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/bgdb.spec.yaml \ diff --git a/rules/output/worker/clinvar.smk b/rules/output/worker/clinvar.smk index 3230ab5..3891589 100644 --- a/rules/output/worker/clinvar.smk +++ b/rules/output/worker/clinvar.smk @@ -3,11 +3,13 @@ def input_annos_strucvar_clinvar_convert(wildcards): """Return input files for ``rule annos_strucvar_clinvar_convert``.""" - clinvar_version = wildcards.clinvar_release.replace("-", "").split("+")[0] + assert DV.clinvar_release.startswith( + wildcards.clinvar_release + ), f"DV.clinvar_release={DV.clinvar_release}, wildcards.clinvar_release={wildcards.clinvar_release}" return { - "tsv": ( + "jsonl": ( f"work/download/annos/{wildcards.genome_release}/strucvars/" - f"clinvar/{clinvar_version}/clinvar_strucvar.tsv.gz" + f"clinvar/{DV.clinvar_release}/clinvar-variants-{wildcards.genome_release}-strucvars.jsonl.gz" ) } @@ -23,10 +25,10 @@ rule annos_strucvar_clinvar_convert: clinvar_release=RE_VERSION, shell: r""" - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type clinvar-sv \ - --path-input {input.tsv} \ - --path-output-bin {output.bin} + --path-input {input.jsonl} \ + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/clinvar_strucvars.spec.yaml \ diff --git a/rules/output/worker/genes_regions.smk b/rules/output/worker/genes_regions.smk deleted file mode 100644 index c7cd499..0000000 --- a/rules/output/worker/genes_regions.smk +++ /dev/null @@ -1,35 +0,0 @@ -## Convert gene regions to binary for worker. - - -def input_genes_regions_worker_convert(wildcards): - """Input function for rule genes_regions_worker_convert.""" - return { - "bed": f"work/annos/{wildcards.genome_release}/features/{wildcards.source}/{wildcards.version}/{wildcards.source}_genes.bed.gz", - } - - -rule genes_regions_worker_convert: - input: - unpack(input_genes_regions_worker_convert), - output: - bin=f"output/full/worker/genes-regions-{{genome_release}}-{{version}}+{PV.worker}/{{source}}_genes.bin", - spec_yaml=f"output/full/worker/genes-regions-{{genome_release}}-{{version}}+{PV.worker}/{{source}}_genes.spec.yaml", - shell: - r""" - varfish-server-worker \ - db to-bin \ - --input-type gene-region \ - --path-input {input.bed} \ - --path-output-bin {output.bin} - - varfish-db-downloader tpl \ - --template rules/output/worker/{wildcards.source}_genes.spec.yaml \ - --value today={TODAY} \ - --value genome_release={wildcards.genome_release} \ - \ - --value version={wildcards.version} \ - \ - --value v_worker={PV.worker} \ - --value v_downloader={PV.downloader} \ - > {output.spec_yaml} - """ diff --git a/rules/output/worker/hgnc.smk b/rules/output/worker/hgnc.smk index 1babd54..3fd83b9 100644 --- a/rules/output/worker/hgnc.smk +++ b/rules/output/worker/hgnc.smk @@ -14,10 +14,10 @@ rule output_hgnc_xlink_binary: exit 1 fi - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type xlink \ --path-input {input.tsv} \ - --path-output-bin {output.bin} + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/hgnc_xlink.spec.yaml \ diff --git a/rules/output/worker/masked.smk b/rules/output/worker/masked.smk index 4e46f74..263088d 100644 --- a/rules/output/worker/masked.smk +++ b/rules/output/worker/masked.smk @@ -12,10 +12,10 @@ rule output_masked_repeat: version=RE_VERSION, shell: r""" - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type masked-region \ --path-input {input} \ - --path-output-bin {output.bin} + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/masked_repeat.spec.yaml \ @@ -41,10 +41,10 @@ rule output_masked_segdup: version=RE_VERSION, shell: r""" - varfish-server-worker db to-bin \ + varfish-server-worker strucvars txt-to-bin \ --input-type masked-region \ --path-input {input} \ - --path-output-bin {output.bin} + --path-output {output.bin} varfish-db-downloader tpl \ --template rules/output/worker/masked_segdup.spec.yaml \ diff --git a/rules/work/annos/strucvars/clinvar.smk b/rules/work/annos/strucvars/clinvar.smk index c852b43..2b148a8 100644 --- a/rules/work/annos/strucvars/clinvar.smk +++ b/rules/work/annos/strucvars/clinvar.smk @@ -3,11 +3,10 @@ rule annos_strucvars_clinvar_download: # -- download/extract ClinVar files output: - tar=f"work/download/annos/{{genome_release}}/strucvars/clinvar/{{clinvar_version}}/clinvar-strucvar-{{genome_release}}-{DV.clinvar_release}.tar.gz", - tsv="work/download/annos/{genome_release}/strucvars/clinvar/{clinvar_version}/clinvar_strucvar.tsv.gz", + jsonl="work/download/annos/{genome_release}/strucvars/clinvar/{clinvar_version}/clinvar-variants-{genome_release}-strucvars.jsonl.gz", wildcard_constraints: genome_release=RE_GENOME, - clinvar_version=RE_VERSION, + clinvar_version=RE_VERSION_MULTI, shell: r""" clinvar_version=$(echo "{wildcards.clinvar_version}" | sed -e 's/-//g' | cut -d '+' -f 1) @@ -16,27 +15,9 @@ rule annos_strucvars_clinvar_download: # -- download/extract ClinVar files trap "rm -rf $TMPDIR" ERR EXIT wget --no-check-certificate \ - -O {output.tar} \ - https://github.com/bihealth/annonars-data-clinvar/releases/download/clinvar-weekly-$clinvar_version/$(basename {output.tar}) + -O /tmp/clinvar-data-extract-vars-{wildcards.clinvar_version}.tar.gz \ + https://github.com/varfish-org/clinvar-data-jsonl/releases/download/clinvar-weekly-$clinvar_version/clinvar-data-extract-vars-{wildcards.clinvar_version}.tar.gz - if [[ {wildcards.genome_release} == grch37 ]]; then - release=GRCh37 - else - release=GRCh38 - fi - - tar -C $TMPDIR -xvf $(readlink -f {output.tar}) - cut -f 2- $TMPDIR/clinvar-strucvar-{wildcards.genome_release}*/output.tsv \ - | awk \ - -F $'\t' \ - -v release=$release \ - -v clinvar_version={wildcards.clinvar_version} \ - \ - ' - BEGIN {{ OFS=FS }} - (NR == 1) {{ print "#" $0; }} - (NR > 1) {{ $8 = clinvar_version; print $0; }} - ' \ - | gzip -c \ - > {output.tsv} + tar -C $TMPDIR -xvf /tmp/clinvar-data-extract-vars-{wildcards.clinvar_version}.tar.gz + cp $TMPDIR/clinvar-data-extract-vars-{wildcards.clinvar_version}/clinvar-variants-{wildcards.genome_release}-strucvars.jsonl.gz {output.jsonl} """ diff --git a/rules/work/annos/strucvars/gnomad.smk b/rules/work/annos/strucvars/gnomad.smk index 33467f7..6e824fd 100644 --- a/rules/work/annos/strucvars/gnomad.smk +++ b/rules/work/annos/strucvars/gnomad.smk @@ -1,7 +1,7 @@ ## Rules related to gnomAD-SV. -rule annos_strucvars_gnomad_grch37_download: # -- download gnomAD-SV files +rule annos_strucvars_gnomad_sv2_grch37_download: # -- download gnomAD-SV v2 files output: vcf="work/download/annos/grch37/strucvars/gnomad/2.1.1/gnomad_v2.1_sv.{token}.vcf.gz", shell: @@ -12,7 +12,7 @@ rule annos_strucvars_gnomad_grch37_download: # -- download gnomAD-SV files """ -rule annos_strucvars_gnomad_grch37_process: # -- process gnomAD-SV files +rule annos_strucvars_gnomad_sv2_grch37_process: # -- process gnomAD-SV v2 files input: vcf="work/download/annos/grch37/strucvars/gnomad/2.1.1/gnomad_v2.1_sv.sites.vcf.gz", output: diff --git a/rules/work/annos/strucvars/gnomad_sv4.smk b/rules/work/annos/strucvars/gnomad_sv4.smk index bf7ef03..b47d9b5 100644 --- a/rules/work/annos/strucvars/gnomad_sv4.smk +++ b/rules/work/annos/strucvars/gnomad_sv4.smk @@ -12,6 +12,64 @@ rule annos_strucvars_gnomad_sv_4_grch38_download: # -- download gnomAD-SV 4.0 f """ +rule annos_strucvars_gnomad_sv_4_grch38_process: # -- process gnomAD-SV v4 files + input: + vcf=[ + f"work/download/annos/grch38/strucvars/gnomad_sv/{{version}}/gnomad.v{{version}}.sv.chr{chrom}.vcf.gz" + for chrom in list(range(1, 23)) + ["X", "Y"] + ], + output: + bed=f"output/full/tracks/track-strucvars-gnomad-sv-grch38-{{version}}+{DV.tracks}/gnomad-sv.bed.gz", + bed_md5=f"output/full/tracks/track-strucvars-gnomad-sv-grch38-{{version}}+{DV.tracks}/gnomad-sv.bed.gz.md5", + bed_tbi=f"output/full/tracks/track-strucvars-gnomad-sv-grch38-{{version}}+{DV.tracks}/gnomad-sv.bed.gz.tbi", + bed_tbi_md5=f"output/full/tracks/track-strucvars-gnomad-sv-grch38-{{version}}+{DV.tracks}/gnomad-sv.bed.gz.tbi.md5", + shell: + r""" + export TMPDIR=$(mktemp -d) + trap "rm -rf $TMPDIR" ERR EXIT + + echo -e "#chromosome\tbegin\tend\tsv_type\tmale_n_homref\tmale_n_het\tmale_n_homalt\tmale_n_hemiref\tmale_n_hemialt\tfemale_n_homref\tfemale_n_het\tfemale_n_homalt\tcnv_n_total\tcnv_n_var" \ + > $TMPDIR/tmp.bed + + for vcf in {input.vcf}; do + bcftools query \ + -f "%CHROM\t%POS0\t%INFO/END\t%INFO/SVTYPE\t%INFO/MALE_N_HOMREF\t%INFO/MALE_N_HET\t%INFO/MALE_N_HOMALT\t%INFO/MALE_N_HEMIREF\t%INFO/MALE_N_HEMIALT\t%INFO/FEMALE_N_HOMREF\t%INFO/FEMALE_N_HET\t%INFO/FEMALE_N_HOMALT\t%CN_NUMBER\t%CN_COUNT\n" \ + $vcf \ + | awk -v OFS='\t' '{{ + if ($5 == ".") {{ $5 = 0; }} + if ($6 == ".") {{ $6 = 0; }} + if ($7 == ".") {{ $7 = 0; }} + if ($8 == ".") {{ $8 = 0; }} + if ($9 == ".") {{ $9 = 0; }} + if ($10 == ".") {{ $10 = 0; }} + if ($11 == ".") {{ $11 = 0; }} + if ($12 == ".") {{ $12 = 0; }} + if ($13 == ".") {{ $13 = 0; }} + if ($14 == ".") {{ + $14 = 0 + }} else {{ + sum = 0 + split($14, a, ",") + for (x in a) {{ + sum += x + }} + $14 = sum + }} + print $0 + }}' \ + | sed -e 's/CPX/BND/g' -e 's/CTX/BND/g' \ + >> $TMPDIR/tmp.bed + done + + bgzip -c $TMPDIR/tmp.bed >{output.bed} + + tabix -p bed -S 1 -f {output.bed} + + md5sum {output.bed} >{output.bed_md5} + md5sum {output.bed_tbi} >{output.bed_tbi_md5} + """ + + rule annos_strucvars_gnomad_cnv_4_grch38_download: # -- download gnomAD-CNV 4.0 files output: vcf="work/download/annos/grch38/strucvars/gnomad_cnv/4.0/gnomad.v4.0.cnv.{token}.vcf.gz", @@ -21,3 +79,35 @@ rule annos_strucvars_gnomad_cnv_4_grch38_download: # -- download gnomAD-CNV 4.0 -O {output.vcf} \ https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/exome_cnv/gnomad.v4.0.cnv.{wildcards.token}.vcf.gz """ + + +rule annos_strucvars_gnomad_cnv_4_grch38_process: # -- process gnomAD-CNV 4.0 files + input: + vcf="work/download/annos/grch38/strucvars/gnomad_cnv/{version}/gnomad.v{version}.cnv.all.vcf.gz", + output: + bed=f"output/full/tracks/track-strucvars-gnomad-cnv-grch38-{{version}}+{DV.tracks}/gnomad-cnv.bed.gz", + bed_md5=f"output/full/tracks/track-strucvars-gnomad-cnv-grch38-{{version}}+{DV.tracks}/gnomad-cnv.bed.gz.md5", + bed_tbi=f"output/full/tracks/track-strucvars-gnomad-cnv-grch38-{{version}}+{DV.tracks}/gnomad-cnv.bed.gz.tbi", + bed_tbi_md5=f"output/full/tracks/track-strucvars-gnomad-cnv-grch38-{{version}}+{DV.tracks}/gnomad-cnv.bed.gz.tbi.md5", + shell: + r""" + set -euo pipefail + + export TMPDIR=$(mktemp -d) + trap "rm -rf $TMPDIR" ERR EXIT + + echo -e "#chromosome\tbegin\tend\tsv_type\tn_total\tn_var" \ + > $TMPDIR/tmp.bed + + bcftools query \ + -f "%CHROM\t%POS0\t%INFO/END\t%INFO/SVTYPE\t%INFO/SN\t%INFO/SC\n" \ + {input.vcf} \ + >> $TMPDIR/tmp.bed + + bgzip -c $TMPDIR/tmp.bed >{output.bed} + + tabix -p bed -S 1 -f {output.bed} + + md5sum {output.bed} >{output.bed_md5} + md5sum {output.bed_tbi} >{output.bed_tbi_md5} + """ diff --git a/varfish_db_downloader/versions.py b/varfish_db_downloader/versions.py index ababfc2..361723c 100644 --- a/varfish_db_downloader/versions.py +++ b/varfish_db_downloader/versions.py @@ -11,7 +11,7 @@ #: Allow to disable the today check. FORCE_TODAY = os.environ.get("FORCE_TODAY", "false").lower() == "true" #: The ClinVar release to use (includes annonars version used for building). -CLINVAR_RELEASE = os.environ.get("CLINVAR_RELEASE", "2023-0625+0.6.3") +CLINVAR_RELEASE = os.environ.get("CLINVAR_RELEASE", "20240612+0.17.0") #: The ClinVar version to use (part of the tag and release name). CLINVAR_VERSION = CLINVAR_RELEASE.replace("-", "").split("+")[0]