-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: build annonars regions (clingen dosage) (#67)
- Loading branch information
Showing
11 changed files
with
183 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3 changes: 3 additions & 0 deletions
3
excerpt-data/98935d27cc8f0dc0/GCF_000001405.40_GRCh38.p14_genomic.gff.gz
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
excerpt-data/f0ed4b0862f1b46b/GCF_000001405.25_GRCh37.p13_genomic.gff.gz
Git LFS file not shown
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
## Rules to create build annonars functional annotation database.. | ||
|
||
|
||
rule work_annonars_functional_download_37: # -- download functional data for GRCh37 | ||
output: | ||
"work/download/refseq/grch37/{version}/{assembly}_genomic.gff.gz", | ||
shell: | ||
r""" | ||
wget -O {output} \ | ||
https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/{wildcards.version}/{wildcards.assembly}/{wildcards.assembly}_genomic.gff.gz | ||
""" | ||
|
||
|
||
rule work_annonars_functional_download_38: # -- download functional data for GRCh37 | ||
output: | ||
"work/download/refseq/grch38/{version}/{assembly}_genomic.gff.gz", | ||
shell: | ||
r""" | ||
wget -O {output} \ | ||
https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/{wildcards.version}/{wildcards.assembly}/{wildcards.assembly}_genomic.gff.gz | ||
""" | ||
|
||
|
||
def output_annonars_functional_input(wildcards): | ||
if wildcards.genome_release == "grch37": | ||
return f"work/download/refseq/grch37/{DV.refseq_fe_37}/GCF_000001405.25_GRCh37.p13_genomic.gff.gz" | ||
else: | ||
return f"work/download/refseq/grch38/{DV.refseq_fe_38}/GCF_000001405.40_GRCh38.p14_genomic.gff.gz" | ||
|
||
|
||
rule output_annonars_functional: # -- build annonars functional RocksDB file | ||
input: | ||
output_annonars_functional_input, | ||
output: | ||
rocksdb_identity=( | ||
"output/full/annonars/functional-{genome_release}-{v_refseq}+{v_annonars}/" | ||
"rocksdb/IDENTITY" | ||
), | ||
spec_yaml=( | ||
"output/full/annonars/functional-{genome_release}-{v_refseq}+{v_annonars}/spec.yaml" | ||
), | ||
wildcard_constraints: | ||
v_refseq=RE_VERSION, | ||
v_annonars=RE_VERSION, | ||
shell: | ||
r""" | ||
export TMPDIR=$(mktemp -d) | ||
trap "rm -rf $TMPDIR" EXIT | ||
zgrep '^#\|RefSeqFE' {input} > $TMPDIR/tmp.gff | ||
annonars functional import -vvv \ | ||
--genome-release {wildcards.genome_release} \ | ||
--path-in-gff $TMPDIR/tmp.gff \ | ||
--path-out-rocksdb $(dirname {output.rocksdb_identity}) | ||
varfish-db-downloader tpl \ | ||
--template rules/output/annonars/functional.spec.yaml \ | ||
--value today={TODAY} \ | ||
\ | ||
--value version={wildcards.v_refseq}+{wildcards.v_annonars} \ | ||
--value v_refseq={wildcards.v_refseq} \ | ||
\ | ||
--value v_annonars={wildcards.v_annonars} \ | ||
--value v_downloader={PV.downloader} \ | ||
> {output.spec_yaml} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
dc.identifier: annonars/functional:{{ version }}-{{ genome_release }} | ||
dc.title: annonars functional elements RocksDB | ||
dc.creator: VarFish Developer Teams | ||
dc.format: application/x-rocksdb | ||
dc.date: {{ today }} | ||
x-version: {{ version }} | ||
x-genome-release: {{ genome_release }} | ||
dc.description: | | ||
RocksDB built from RefSeq Functional Elements (and other sources in | ||
the future). | ||
dc.source: | ||
- PMID:34876495 | ||
- https://www.ncbi.nlm.nih.gov/refseq/ | ||
x-created-from: | ||
- name: RefSeq Functional Elements | ||
version: {{ v_refseq }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
## Rules to create build annonars regions annotation database.. | ||
|
||
|
||
rule work_annonars_regions_download: # -- download clingen regions | ||
output: | ||
"work/download/clingen/{genome_release}/{today}/ClinGen_region_curation_list_{genome_release}.tsv", | ||
shell: | ||
r""" | ||
if [[ "{wildcards.genome_release}" == "grch38" ]]; then | ||
GENOME=GRCh37 | ||
else | ||
GENOME=GRCh38 | ||
fi | ||
wget -O {output} \ | ||
ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_$GENOME.tsv | ||
""" | ||
|
||
|
||
rule output_annonars_regions: # -- build annonars regions RocksDB file | ||
input: | ||
"work/download/clingen/{genome_release}/{date}/ClinGen_region_curation_list_{genome_release}.tsv", | ||
output: | ||
rocksdb_identity=( | ||
"output/full/annonars/regions-{genome_release}-{date}+{v_annonars}/" "rocksdb/IDENTITY" | ||
), | ||
spec_yaml=("output/full/annonars/regions-{genome_release}-{date}+{v_annonars}/spec.yaml"), | ||
wildcard_constraints: | ||
v_refseq=RE_VERSION, | ||
v_annonars=RE_VERSION, | ||
shell: | ||
r""" | ||
if [[ "$(date +%Y%m%d)" != "{wildcards.date}" ]] && [[ "{FORCE_TODAY}" != "True" ]]; then | ||
>&2 echo "{wildcards.date} is not today" | ||
exit 1 | ||
fi | ||
annonars regions import -vvv \ | ||
--genome-release {wildcards.genome_release} \ | ||
--path-in-clingen {input} \ | ||
--path-out-rocksdb $(dirname {output.rocksdb_identity}) | ||
varfish-db-downloader tpl \ | ||
--template rules/output/annonars/regions.spec.yaml \ | ||
--value today={TODAY} \ | ||
\ | ||
--value version={wildcards.date}+{wildcards.v_annonars} \ | ||
\ | ||
--value v_annonars={wildcards.v_annonars} \ | ||
--value v_downloader={PV.downloader} \ | ||
> {output.spec_yaml} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
dc.identifier: annonars/regions:{{ version }}-{{ genome_release }} | ||
dc.title: annonars regions annotation RocksDB | ||
dc.creator: VarFish Developer Teams | ||
dc.format: application/x-rocksdb | ||
dc.date: {{ today }} | ||
x-version: {{ version }} | ||
x-genome-release: {{ genome_release }} | ||
dc.description: | | ||
RocksDB with region annotation. | ||
dc.source: | ||
- https://search.clinicalgenome.org/kb/gene-dosage | ||
x-created-from: | ||
- name: ClinGen Region Dosage Pathogenicity | ||
version: {{ today }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters