-
Notifications
You must be signed in to change notification settings - Fork 0
/
6_FDR.R
executable file
·110 lines (87 loc) · 4.04 KB
/
6_FDR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# This program will adjust the p-values from 5_sig_pcalc_parts.R for multiple comparisons using FDR
# It will read the different files (parts) and will create a single file with adjusted p-value
# for now manual modifications need to be done if there is only one part, specially if the number of that part is different to 1
# This script will need to be run only once no matter how many parts were used to split the dictionary
# "Idx0.Beg" and "Idx0.End" are the beginning and ending positions in the CGH file for the sections (0 start)
# INPUT
# p-value files, if the data set was called SET they must be in ~Research/Results/SET/significance/pvals
# pvalue file names look like B0_sim_SET_pvals_1.txt (sim=phephenotype, SET=dataset and 1=part number)
# Notice that the Parts Argument refers to the TOTAL number of parts
# OUTPUT
# 2 files: one ending in xxx_FDR.txt with all the sections and their corresponding False Dsicovery Rate (FDR)
# the other file ending in xxx_FDRsig.txt with a subset of sections from xxx_FDR.txt with FDR<=sig
# where sig is the threshold chosen as the maximum FDR allowed for significance.
# ARGUMENTS
# 4. file Name of dataset/file
# 5. Parameter (B0, B1, etc)
# 6. phenotype (lumA, test, sim, rec, etc)
# 7. Parts (in which the directory was split (TOTAL))
# 8. perm: number of permutations used to modify 0 p-values (e.g. 1/10,000=0.0001, pvalue[0]=0.0001)
# 9. sig: desired false discovery rate (fdr) for the significant sections
# 10. subdir a directory within /file to send the results and read dictionaries, sig_pcalc files
# note: data should be within /file directory
# EXAMPLE
# R --slave --args set B0 test 2 10000 0.05< 6_FDR.R
# R --vanilla --args bergamaschiMADMA3_sect B0 Luminal_A 7 10000 0.05< 6_FDR.R
# R --vanilla --args bergamaschiMADMA3_sect B0 Luminal_A 7 10000 0.05 subdir< 6_FDR.R
# Get the command line arguments
args = commandArgs();
file <- args[4];
param <- args[5];
phen <- args[6];
parts <- args[7];
perm <- as.numeric(args[8]);
sig <- as.numeric(args[9]);
subdir <- args[10];
# Only for debugging purposes
#file <- "horlings";
#param <- "B0";
#phen <- "Hist_grade3";
#parts <- 8;
#perm <- 10000;
#sig <- 0.05;
#subdir <- "sect";
# Working directory
#begPath <- "~/Research";
begPath <- "..";
######## CREATING A SINGLE P-VALUE FILE FROM THE PARTS ###########
# Read the first file
begName <- paste(param, phen, file, subdir, "pvals", sep="_");
first <- paste(begName, "1.txt", sep="_");
Path <- paste(begPath, "Results", file, subdir, "significance", "pvals", param, phen, sep="/");
filePath <- paste(Path, first, sep="/");
print(filePath);
all_fdr <- read.table(filePath, header=TRUE, sep="\t");
all_fdr<-all_fdr[order(all_fdr$Chr, all_fdr$Arm,all_fdr$Segment), ];
# Paste together all the files
if (parts>1) {
for (i in c(2:parts)) {
tempPath <- paste(Path, "/", begName, "_", i, ".txt", sep="");
print(tempPath);
tempFile <- read.table(tempPath, header=TRUE, sep="\t");
tempFile<-tempFile[order(tempFile$Chr, tempFile$Arm,tempFile$Segment), ];
all_fdr <- rbind(all_fdr, tempFile);
rm(tempFile);
}
}
######## ADJUSTING FOR MULTIPLE TESTING WITH FDR ########
# Avoiding 0 p-value before adjusting with FDR
noZero<-1/perm;
all_fdr$P.Value<-ifelse(all_fdr$P.Value==0, noZero, all_fdr$P.Value);
# Adjust for multiple testing with FDR
all_fdr$fdr<-round(p.adjust(all_fdr$P.Value,"fdr"), digits=4);
# Formating output for Test.Stat and P.Value
all_fdr$Test.Stat<-round(all_fdr$Test.Stat, digits=2);
all_fdr$P.Value<-round(all_fdr$P.Value, digits=4);
#Saving the complete fdr adjusted p-values into file
all_fdrName <- paste(begName, "FDR", sep="_");
all_fdrPath <- paste(Path, "/", all_fdrName,".txt", sep="");
print(all_fdrPath);
write.table(all_fdr,all_fdrPath, sep='\t', row.names=FALSE);
######## SELECTING AND SAVING SIGNIFICANT SECTIONS ###########
sig_fdr <- subset(all_fdr, all_fdr$fdr<=sig);
# Saving into file significant FDR sections
sig_fdrName <- paste(all_fdrName,"sig.txt", sep="");
sig_fdrPath <- paste(Path, sig_fdrName, sep="/");
print(sig_fdrPath);
write.table(sig_fdr,sig_fdrPath, sep='\t', row.names=FALSE)