-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcre.topmed.R
76 lines (63 loc) · 2.29 KB
/
cre.topmed.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# http://bioconductor.org/packages/release/data/annotation/html/MafDb.TOPMed.freeze5.hg19.html
# https://bioconductor.org/packages/release/data/annotation/html/SNPlocs.Hsapiens.dbSNP144.GRCh37.html
# http://gnomad.broadinstitute.org/
# https://bravo.sph.umich.edu/freeze3a/hg19/
install <- function(){
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("MafDb.TOPMed.freeze5.hg19", version = "3.8")
BiocManager::install("SNPlocs.Hsapiens.dbSNP144.GRCh37")
}
init <- function(){
library(SNPlocs.Hsapiens.dbSNP144.GRCh37)
library(MafDb.TOPMed.freeze5.hg19)
}
get_af_by_rsid <- function(rs_id){
# DEBUG:
# from Johar 2016
# rs_id <- "rs202193903"
# rs_id = "rs903331232"
# rs_id = "rs1129038"
#- not found
#ls("package:MafDb.TOPMed.freeze5.hg19")
mafdb <- MafDb.TOPMed.freeze5.hg19
#mafdb
#citation(mafdb)
#populations(mafdb)
## lookup allele frequencies for rs1129038, a SNP associated to blue and brown eye colors
## as reported by Eiberg et al. Blue eye color in humans may be caused by a perfectly associated
## founder mutation in a regulatory element located within the HERC2 gene inhibiting OCA2 expression.
## Human Genetics, 123(2):177-87, 2008 [http://www.ncbi.nlm.nih.gov/pubmed/18172690]
snpdb <- SNPlocs.Hsapiens.dbSNP144.GRCh37
#print(rs_id)
rng <- snpsById(snpdb, ids = rs_id, ifnotfound = "drop")
#rng
seqlevelsStyle(rng) <- seqlevelsStyle(mafdb)
if (length(rng) > 0){
scores <- gscores(mafdb, rng)
result <- scores$AF
}else{
result <- NA
}
return(result)
#gscores(mafdb, GRanges("chr15:28356859"))
}
check_hgmd <- function(){
setwd("~/Desktop/work")
hgmd <- read.csv("hgmd.csv", header = T, stringsAsFactors = F)
print("Total variants")
print(nrow(hgmd))
hgmd$AF <- NULL
hgmd <- hgmd[hgmd$tag == "DM" | hgmd$tag == "DM?",]
for (i in 1:nrow(hgmd)){
print(i)
hgmd[i,"AF"] <- af_by_rsid(hgmd[i, "dbsnp"])
}
write.csv(hgmd, "hgmd_with_topmed.csv", row.names = F)
hgmd <- hgmd[!is.na(hgmd$AF),]
print("With AF:")
print(nrow(hgmd))
hgmd <- hgmd[hgmd$AF > 0.01,]
print("AF > 1%")
print(nrow(hgmd))
}