-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTCGA_AUC.R
121 lines (106 loc) · 4.42 KB
/
TCGA_AUC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
library(here)
library(ROCR)
for (i in c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")) {
assign(paste0("pvals.", i),
read.csv(here("Results/TCGA paired data results May 2020",
paste0("pvals.", i, ".csv")), row.names=1))
}
for (i in c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")) {
assign(paste0("genes.", i),
readRDS(here("Data sources/Cancer-related genes", paste0(i, "_genes_info.rds"))))
assign(paste0(i, "_related"),
rownames(get(paste0("pvals.", i))) %in% get(paste0("genes.", i))$ENSEMBL)
rm(list=paste0("genes.", i))
}
aucs <- as.data.frame(matrix(nrow=25, ncol=8), row.names=names(pvals.brca))
names(aucs) <- c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")
for (i in names(aucs)) {
for (j in rownames(aucs)) {
NAs <- which(is.na(get(paste0("pvals.", i))[[j]]))
if (length(NAs) > 0) {
pred <- prediction(1 - get(paste0("pvals.", i))[[j]][-NAs], get(paste0(i, "_related"))[-NAs])
}
else {
pred <- prediction(1 - get(paste0("pvals.", i))[[j]], get(paste0(i, "_related")))
}
if (grepl("prob", j)) {
aucs[i][j,] <- 1 - performance(pred, "auc")@y.values[[1]]
}
else {
aucs[i][j,] <- performance(pred, "auc")@y.values[[1]]
}
}
}
boxplot(t(aucs))
rowMeans(aucs)
write.csv(aucs, file=here("Results/TCGA paired data results May 2020", "AUCs.weak_known_genes.csv"))
# Long chains (original ones, 3500 runs, August 2020)
for (i in c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")) {
assign(paste0("pvals.", i),
read.csv(here("Results/TCGA long chain results Aug 2020",
paste0("pvals.", i, ".csv")), row.names=1))
}
for (i in c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")) {
assign(paste0("genes.", i),
readRDS(here("Data sources/Cancer-related genes", paste0(i, "_genes_info.rds"))))
assign(paste0(i, "_related"),
rownames(get(paste0("pvals.", i))) %in% get(paste0("genes.", i))$ENSEMBL)
rm(list=paste0("genes.", i))
}
aucs <- as.data.frame(matrix(nrow=11, ncol=8), row.names=names(pvals.brca))
names(aucs) <- c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")
for (i in names(aucs)) {
for (j in rownames(aucs)) {
NAs <- which(is.na(get(paste0("pvals.", i))[[j]]))
if (length(NAs) > 0) {
pred <- prediction(1 - get(paste0("pvals.", i))[[j]][-NAs], get(paste0(i, "_related"))[-NAs])
}
else {
pred <- prediction(1 - get(paste0("pvals.", i))[[j]], get(paste0(i, "_related")))
}
if (grepl("prob", j)) {
aucs[i][j,] <- 1 - performance(pred, "auc")@y.values[[1]]
}
else {
aucs[i][j,] <- performance(pred, "auc")@y.values[[1]]
}
}
}
boxplot(t(aucs))
rowMeans(aucs)
write.csv(aucs, file=here("Results/TCGA long chain results Aug 2020", "AUCs.weak_known_genes.csv"))
# Longer chains (two times 3500 runs, September 2020)
for (i in c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")) {
assign(paste0("pvals.", i),
read.csv(here("Results/TCGA long chain results Aug 2020",
paste0("pvals.", i, "_20k.csv")), row.names=1))
}
for (i in c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")) {
assign(paste0("genes.", i),
readRDS(here("Data sources/Cancer-related genes", paste0(i, "_genes_info.rds"))))
assign(paste0(i, "_related"),
rownames(get(paste0("pvals.", i))) %in% get(paste0("genes.", i))$ENSEMBL)
rm(list=paste0("genes.", i))
}
aucs <- as.data.frame(matrix(nrow=11, ncol=8), row.names=names(pvals.brca))
names(aucs) <- c("brca", "kirc", "thca", "luad", "lihc", "lusc", "prad", "coad")
for (i in names(aucs)) {
for (j in rownames(aucs)) {
NAs <- which(is.na(get(paste0("pvals.", i))[[j]]))
if (length(NAs) > 0) {
pred <- prediction(1 - get(paste0("pvals.", i))[[j]][-NAs], get(paste0(i, "_related"))[-NAs])
}
else {
pred <- prediction(1 - get(paste0("pvals.", i))[[j]], get(paste0(i, "_related")))
}
if (grepl("prob", j)) {
aucs[i][j,] <- 1 - performance(pred, "auc")@y.values[[1]]
}
else {
aucs[i][j,] <- performance(pred, "auc")@y.values[[1]]
}
}
}
boxplot(t(aucs))
rowMeans(aucs)
write.csv(aucs, file=here("Results/TCGA long chain results Aug 2020", "AUCs.weak_known_genes_20k.csv"))