-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_scenario_SURV_Tailcut.R
148 lines (120 loc) · 5.23 KB
/
make_scenario_SURV_Tailcut.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# - - - - - - - - - -- - - - - - - - - - - - - Settings - - - - - - - - - -- - - - - - - - - - - - -
# File handling
fileName = "C:/Documents and Settings/heastwood/My Documents/SACEMA/OD cut-off/hargrove-OD-22.10.2010.csv"
fileWriteLocation = "C:/Documents and Settings/heastwood/My Documents/SACEMA/OD cut-off/harg-22-10/"
SASFilePref = "SURV_var_Tail"
# algorithm variables
tailStart = 1.99 # tail in years
tailEnd = 10 # tail in years
nSamples = 2 # ie at least n samples from a client
ODcut = 0.8
timeCutLower = 0 # exclude samples for which first BED test is lower than timeCut
timeCutUpper = 5000 # exclude samples for which first BED test is higher than timeCut
ExcludeAlreadySeroconverted = FALSE
ExcludeNeverSeroconverted = FALSE
# - - - - - - - - - -- - - - - - - - - - - - - End settings - - - - - - - - - -- - - - - - - - - - - - -
# Functions
make_scenario <- function(ID.time.OD, cut, nSamples, timeCutLower, timeCutUpper, ExcludeAlreadySeroconverted, ExcludeNeverSeroconverted) {
# settings
# cut = OD cut off
# dataNNu.time.OD = data input matrix (person Id, time of test in days)
# setting: 1 = remove already seroconverted at t0,
# 2 = remove slow progressors
# 3 = remove normal progressors
# 4 = remove already seroconverted and slow progressors
BEDH <- ID.time.OD
d<-as.data.frame(BEDH)
n <- dim(BEDH)[1] - 1
nnu = d$ID[n]
omitArray = array(dim=c(nnu,1))
dataOut <- array(dim=c(nnu,4))
d2 = d
count <- 0
# tag already seroconverted at first and subsequent measurements
if (ExcludeAlreadySeroconverted == TRUE){
for (j in c(1:nnu)) {
if (dim(d[d$ID == j & d$OD > cut,])[1] == dim(d[d$ID == j,])[1]){
d2[d2$ID == j,][2] <- 11111
d2[d2$ID == j,][3] <- 11111
}
}
}
#tag those who never seroconvert in any measurements
if (ExcludeNeverSeroconverted == TRUE) {
for (j in c(1:nnu)) {
if ((dim(d[d$ID == j & d$OD > cut,])[1] == 0)) {
d2[d2$ID == j,][2] <- 22222
d2[d2$ID == j,][3] <- 22222
}
}
}
#tag only n samples
for (j in c(1:nnu)) {
if (dim(d[d$ID == j,])[1] < nSamples) {
d2[d2$ID == j,][2] <- 33333
d2[d2$ID == j,][3] <- 33333
}
}
#tag samples for which the first time is < timeCut
for (j in c(1:nnu)) {
if (d[d$ID == j,2][1] <= timeCutLower | d[d$ID == j,2][1] >= timeCutUpper) {
d2[d2$ID == j,][2] <- 44444
d2[d2$ID == j,][3] <- 44444
}
}
d3 <- d2[d2$OD != 11111 & d2$OD != 22222 & d2$OD != 33333 & d2$OD != 44444,]
return(d3)
}
get_numSamples <- function(dataArrayIn) {
return(dim(unique(dataArrayIn[1]))[1])
}
get_recents <- function(Sdata, cut) {
d<-as.data.frame(Sdata)
n <- dim(d)[1] - 1
nnu = d$ID[n]
count = 0
continue = TRUE
for (j in c(1:nnu)) {
if ((dim(d[d$ID == j & d$OD > cut,])[1] == 0) & continue == TRUE) {
continue = FALSE
count = count + 1
}
else {
continue = TRUE
}
}
return(count)
}
if (ExcludeAlreadySeroconverted == TRUE & ExcludeNeverSeroconverted == TRUE) sero = "+"
if (ExcludeAlreadySeroconverted == TRUE & ExcludeNeverSeroconverted == FALSE) sero = "EAST.ENSF"
if (ExcludeAlreadySeroconverted == FALSE & ExcludeNeverSeroconverted == TRUE) sero = "EASF.ENST"
if (ExcludeAlreadySeroconverted == FALSE & ExcludeNeverSeroconverted == FALSE) sero = "EASF.ENSF"
description = paste("_const_S.", nSamples, "_OD.", OD, "_TL.", timeCutLower, "_TU.", timeCutUpper, "_",sero, "_", sep="")
SASFile = paste(SASFilePref, description, sep="")
SASOutFileCSVname = paste(SASFilePref, description, sep="")
fileWriteLocation2 = paste(fileWriteLocation, SASFile, sep="")
fileWriteLocation3 = paste(fileWriteLocation, SASOutFileCSVname, sep="")
BEDH <- read.csv(file=fileName, head=TRUE, sep=" ")
Sdata = list(); Strans = list(); Sunique = list(); Sunique = list(); N <- list()
nrecents <- array()
names <- array()
#write names and S values to array
#names = array()
tailSpan <- tailEnd - tailStart
for (i in c(1:tailSpan)) {
Sdata[[i]] <- make_scenario(BEDH, ODcut, nSamples, timeCutLower, timeCutUpper, ExcludeAlreadySeroconverted, ExcludeNeverSeroconverted)
Strans[[i]] <- Sdata[[i]]
Strans[[i]][2] <- log(Sdata[[i]][2])
Strans[[i]][3] <- sqrt(Sdata[[i]][3])
Sunique[[i]] <- unique(Sdata[[i]][1])
N[[i]] <- get_numSamples(Sdata[[i]])
print(paste ("number of samples for ", ODcut, "is " , N[[i]]))
#write survival file
names[i] <- round((tailStart + i - 1) * 365)
write.table(Sdata[[i]], file = paste(fileWriteLocation2, "Tail.", names[i], ".csv", sep=""), sep = " ", col.names = TRUE, append="FALSE", row.names=FALSE, quote=FALSE)
nrecents[i] <- get_recents(Sdata[[i]], ODcut)
}
write.table(N, file = paste(fileWriteLocation2, "Tail.nsamples", sep=""), sep = ",", col.names = FALSE, append="FALSE", row.names=FALSE, quote=FALSE)
write.table(names, file = paste(fileWriteLocation2, "Tail.names", sep=""), sep = ",", col.names = FALSE, append="FALSE", row.names=FALSE, quote=FALSE)
write.table(nrecents, file = paste(fileWriteLocation2, "Tail.nrecent", sep=""), sep = ",", col.names = FALSE, append="FALSE", row.names=FALSE, quote=FALSE)
# - - - - - - - - - -- - - - - - - - - - - - - End code - - - - - - - - - -- - - - - - - - - - - - -