Skip to content

Commit

Permalink
feat: upgrade dbNSFP to v4.5 (#77) (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Jan 1, 2024
1 parent 53e4209 commit 62d2f9a
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 27 deletions.
30 changes: 16 additions & 14 deletions download_urls.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# dbNSFP v4.5a
- url: https://dbnsfp.s3.amazonaws.com/dbNSFP4.5a.zip
excerpt_strategy:
strategy: manual
count: null
# dbNSFP v4.5c
- url: https://dbnsfp.s3.amazonaws.com/dbNSFP4.5c.zip
excerpt_strategy:
strategy: manual
count: null
- url: ftp://dbnsfp:[email protected]/dbscSNV1.1.zip
skip_upstream_check: true # does not work reliably in tests
excerpt_strategy:
strategy: manual
count: null

- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg19.tsv.gz
- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg38.tsv.gz
- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_gene_hg38.tsv.gz
Expand Down Expand Up @@ -168,20 +184,6 @@
- url: https://kircherlab.bihealth.org/download/CADD/v1.6/GRCh38/gnomad.genomes.r3.0.indel_inclAnno.tsv.gz
- url: https://kircherlab.bihealth.org/download/CADD/v1.6/GRCh38/gnomad.genomes.r3.0.indel_inclAnno.tsv.gz.tbi

- url: https://usf.box.com/shared/static/bvfzmkpgtphvbmmrvb2iyl2jl21o49kc
excerpt_strategy:
strategy: manual
count: null
- url: https://usf.box.com/shared/static/a84zcdlkx2asq2nxh6xr2gdb4csmyvhk
excerpt_strategy:
strategy: manual
count: null
- url: ftp://dbnsfp:[email protected]/dbscSNV1.1.zip
skip_upstream_check: true # does not work reliably in tests
excerpt_strategy:
strategy: manual
count: null

- url: https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
excerpt_strategy:
strategy: no-excerpt
Expand Down
3 changes: 3 additions & 0 deletions excerpt-data/8bad0386c4cd562f/dbNSFP4.5c.zip
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/8bad0386c4cd562f/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/b920970c3e3329fb/dbNSFP4.5a.zip
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/b920970c3e3329fb/url.txt
Git LFS file not shown
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,82 @@
"name": "VARITY_ER_LOO_rankscore",
"typ": "Float"
},
{
"name": "ESM1b_score",
"typ": "String"
},
{
"name": "ESM1b_rankscore",
"typ": "Float"
},
{
"name": "ESM1b_pred",
"typ": "String"
},
{
"name": "EVE_score",
"typ": "String"
},
{
"name": "EVE_rankscore",
"typ": "Float"
},
{
"name": "EVE_Class10_pred",
"typ": "String"
},
{
"name": "EVE_Class20_pred",
"typ": "String"
},
{
"name": "EVE_Class25_pred",
"typ": "String"
},
{
"name": "EVE_Class30_pred",
"typ": "String"
},
{
"name": "EVE_Class40_pred",
"typ": "String"
},
{
"name": "EVE_Class50_pred",
"typ": "String"
},
{
"name": "EVE_Class60_pred",
"typ": "String"
},
{
"name": "EVE_Class70_pred",
"typ": "String"
},
{
"name": "EVE_Class75_pred",
"typ": "String"
},
{
"name": "EVE_Class80_pred",
"typ": "String"
},
{
"name": "EVE_Class90_pred",
"typ": "String"
},
{
"name": "AlphaMissense_score",
"typ": "String"
},
{
"name": "AlphaMissense_rankscore",
"typ": "Float"
},
{
"name": "AlphaMissense_pred",
"typ": "String"
},
{
"name": "Aloft_Fraction_transcripts_affected",
"typ": "String"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,70 @@
"name": "VARITY_ER_LOO_rankscore",
"typ": "Float"
},
{
"name": "ESM1b_score",
"typ": "String"
},
{
"name": "ESM1b_rankscore",
"typ": "Float"
},
{
"name": "ESM1b_pred",
"typ": "String"
},
{
"name": "EVE_score",
"typ": "String"
},
{
"name": "EVE_rankscore",
"typ": "Float"
},
{
"name": "EVE_Class10_pred",
"typ": "String"
},
{
"name": "EVE_Class20_pred",
"typ": "String"
},
{
"name": "EVE_Class25_pred",
"typ": "String"
},
{
"name": "EVE_Class30_pred",
"typ": "String"
},
{
"name": "EVE_Class40_pred",
"typ": "String"
},
{
"name": "EVE_Class50_pred",
"typ": "String"
},
{
"name": "EVE_Class60_pred",
"typ": "String"
},
{
"name": "EVE_Class70_pred",
"typ": "String"
},
{
"name": "EVE_Class75_pred",
"typ": "String"
},
{
"name": "EVE_Class80_pred",
"typ": "String"
},
{
"name": "EVE_Class90_pred",
"typ": "String"
},
{
"name": "Aloft_Fraction_transcripts_affected",
"typ": "String"
Expand Down
2 changes: 1 addition & 1 deletion rules/output/annonars/dbnsfp.smk
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ rule output_annonars_dbnsfp: # -- build dbNSFP RocksDB with annonars
--genome-release {wildcards.genome_release} \
--null-values=. \
--inference-row-count 100000 \
--path-schema-json rules/output/annonars/dbnsfp-schema-{wildcards.v_dbnsfp}.json \
\
--path-out-rocksdb $(dirname {output.rocksdb_identity}) \
--path-schema-json rules/output/annonars/dbnsfp-schema-{wildcards.v_dbnsfp}.json \
\
$(if [[ "{wildcards.genome_release}" == "grch37" ]]; then \
echo --col-chrom 'hg19_chr'; \
Expand Down
12 changes: 1 addition & 11 deletions rules/work/annos/seqvars/dbnsfp.smk
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
## Rules related to dbNSFP.


#: Download URL for dbNSFP 4.4a
DBNSFP_ACADEMIC_URL = "https://usf.box.com/shared/static/bvfzmkpgtphvbmmrvb2iyl2jl21o49kc"
#: Download URL for dbNSFP 4.4c
DBNSFP_COMMMERCIAL_URL = "https://usf.box.com/shared/static/a84zcdlkx2asq2nxh6xr2gdb4csmyvhk"


def files_dbnsfp():
"""Helper that returns the files within the dbNSFP archive."""
lst = [
Expand Down Expand Up @@ -60,11 +54,7 @@ rule annos_seqvars_dbnsfp_download: # -- download dbNSFP ZIP file
threads: 8
shell:
r"""
if [[ "{wildcards.variant}" == a ]]; then
url={DBNSFP_ACADEMIC_URL}
else
url={DBNSFP_COMMMERCIAL_URL}
fi
url=https://dbnsfp.s3.amazonaws.com/dbNSFP4.5{wildcards.variant}.zip
aria2c \
--check-certificate=false \
Expand Down
2 changes: 1 addition & 1 deletion varfish_db_downloader/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class DataVersions:
ensembl_38="109",
ensembl="110",
today=TODAY,
dbnsfp="4.4",
dbnsfp="4.5",
dbscsnv="1.1",
cadd="1.6",
gnomad_constraints="4.0",
Expand Down

0 comments on commit 62d2f9a

Please sign in to comment.