forked from tabdelaal/scRNAseq_Benchmark
-
Notifications
You must be signed in to change notification settings - Fork 1
/
evaluate.R
77 lines (59 loc) · 2.61 KB
/
evaluate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
evaluate <- function(TrueLabelsPath, PredLabelsPath, Indices = NULL){
"
Script to evaluate the performance of the classifier.
It returns multiple evaluation measures: the confusion matrix, median F1-score, F1-score for each class, accuracy, percentage of unlabeled, population size.
The percentage of unlabeled cells is find by checking for cells that are labeled 'Unassigned', 'unassigned', 'Unknown', 'unknown', 'Nodexx', 'rand', or 'ambiguous'.
Parameters
----------
TrueLabelsPath: csv file with the true labels (format: one column, no index)
PredLabelsPath: csv file with the predicted labels (format: one column, no index)
Indices: which part of the csv file should be read (e.g. if more datasets are tested at the same time) (format: c(begin, end))
Returns
-------
Conf: confusion matrix
MedF1 : median F1-score
F1 : F1-score per class
Acc : accuracy
PercUnl : percentage of unlabeled cells
PopSize : number of cells per cell type
"
true_lab <- unlist(read.csv(TrueLabelsPath))
pred_lab <- unlist(read.csv(PredLabelsPath))
if (! is.null(Indices)){
true_lab <- true_lab[Indices]
pred_lab <- pred_lab[Indices]
}
unique_true <- unlist(unique(true_lab))
unique_pred <- unlist(unique(pred_lab))
unique_all <- unique(c(unique_true,unique_pred))
conf <- table(true_lab,pred_lab)
pop_size <- rowSums(conf)
pred_lab = gsub('Node..','Node',pred_lab)
conf_F1 <- table(true_lab,pred_lab,exclude = c('unassigned','Unassigned','Unknown','rand','Node','ambiguous','unknown'))
F1 <- vector()
sum_acc <- 0
for (i in c(1:length(unique_true))){
findLabel = colnames(conf_F1) == row.names(conf_F1)[i]
if(sum(findLabel)){
prec <- conf_F1[i,findLabel] / colSums(conf_F1)[findLabel]
rec <- conf_F1[i,findLabel] / rowSums(conf_F1)[i]
if (prec == 0 || rec == 0){
F1[i] = 0
} else{
F1[i] <- (2*prec*rec) / (prec + rec)
}
sum_acc <- sum_acc + conf_F1[i,findLabel]
} else {
F1[i] = 0
}
}
pop_size <- pop_size[pop_size > 0]
names(F1) <- names(pop_size)
med_F1 <- median(F1)
total <- length(pred_lab)
num_unlab <- sum(pred_lab == 'unassigned') + sum(pred_lab == 'Unassigned') + sum(pred_lab == 'rand') + sum(pred_lab == 'Unknown') + sum(pred_lab == 'unknown') + sum(pred_lab == 'Node') + sum(pred_lab == 'ambiguous')
per_unlab <- num_unlab / total
acc <- sum_acc/sum(conf_F1)
result <- list(Conf = conf, MedF1 = med_F1, F1 = F1, Acc = acc, PercUnl = per_unlab, PopSize = pop_size)
return(result)
}