-
Notifications
You must be signed in to change notification settings - Fork 0
/
R_Meets_Uniprot.R
76 lines (53 loc) · 1.85 KB
/
R_Meets_Uniprot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#R_Meets_Uniprot Version 1.0
#Written by SeyyedAmirreza Mousavi Majd
#Biostrings package must be installed prior to use.
####Initialization
library(Biostrings)
setwd("C:/Users/Amirreza/Desktop/Search_Results/")
####Defining SearchEBI function
searchEBI = function(Bio_database="uniprotkb",
Query="OCT4",
Result_size=20){
dest_file_location=paste0("Uniprot_query_at_",
as.character(Sys.time()),".txt",
collapse = ""
)
dest_file_location=gsub(":","_",dest_file_location)
dest_file_location=gsub("-","_",dest_file_location)
####Download an XML file, containing data for the query, saved as text
download.file(url = paste0(
"https://www.ebi.ac.uk/ebisearch/ws/rest/"
,Bio_database,"?query=",Query,
"&size=",toString(Result_size),
"&fieldurl=true&format=xml",
collapse = ""),destfile = dest_file_location)
####parse the txt file, to retrieve URL for each result
dat=readLines(dest_file_location)
stidx=unlist(gregexpr2('<fieldURL name="main">',dat))
endidx=unlist(gregexpr2('</fieldURL>',dat))
l=list()
for(i in 1:(length(stidx))){
s1=subseq(dat,stidx[i],endidx[i]-1)
s2=subseq(s1,23)
l=c(l,s2)
}
####Return the URLs of the query results
return(unlist(l))
}
####Defining storeFasta function
storeFasta=function(url_string, sleep_time=0.01){
Sys.sleep(sleep_time)
filename_fa=paste0(subseq(url_string,start = 33),
".fasta",collapse = "")
fetch_address=paste0(url_string,
".fasta",collapse = "")
download.file(fetch_address,filename_fa)
cat(filename_fa)
cat(" ")
cat("has successfully been downloaded from ")
cat(fetch_address)
cat("\n")
}
####EXAMPLE!
S=searchEBI("uniprotkb","OCT4",20)
for(i in 1:20){storeFasta(S[i])}