-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenbankrename_bylocus.R
49 lines (36 loc) · 1.31 KB
/
genbankrename_bylocus.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
library(stringr)
key <- as.matrix(read.table("key",sep="\t"))
fastafile <- as.matrix(read.table("name",sep="\t",quote=""))
outputsequence <- NULL
intable <- as.matrix(read.table(fastafile,sep="\t"))
ucelocus <- gsub(".fasta","",fastafile)
rows <- dim(intable)[1]
tempfile <- intable[1,1]
sequencepaste <- NULL
for (j in 2:rows) {
if ((length(grep(">",intable[j,1])))>0) {
to_write <- toupper(sequencepaste)
to_write <- rbind(to_write,intable[j,1])
tempfile <- rbind(tempfile,to_write)
sequencepaste <- NULL
} else {
sequencepaste <- paste(sequencepaste,intable[j,1],sep="")
}
}
tempfile <- rbind(tempfile,toupper(sequencepaste))
for (j in 1:dim(key)[1]) {
tempfile[(which(tempfile[,1]==key[j,1])),1] <- eval(parse(text=key[j,2]))
}
linestoditch <- matrix(FALSE,ncol=1,nrow=(dim(tempfile)[1]))
for (j in seq(2, dim(tempfile)[1],2)) {
linestoditch[j,1] <- (nchar(gsub("-","",tempfile[j,1]))+nchar(gsub("?","",tempfile[j,1],fixed=TRUE)))<50
if (linestoditch[j,1]==TRUE) {
linestoditch[(j-1),1] <- TRUE
}
}
removingmissing <- as.matrix(tempfile[(which(linestoditch[,1]==FALSE)),],ncol=1)
for (j in seq(2, dim(removingmissing)[1],2)) {
removingmissing[j,1] <- gsub("-","",removingmissing[j,1])
}
outputsequence <- rbind(outputsequence,removingmissing)
write.table(outputsequence,"temp",quote=FALSE,row.names=FALSE,col.names=FALSE)