-
Notifications
You must be signed in to change notification settings - Fork 0
/
HRL_microclim_functions.R
executable file
·425 lines (358 loc) · 17.3 KB
/
HRL_microclim_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
##Functions for formatting and processing microclimate data.
##Author: Ian Breckheimer (Modified by Meera Lee Sethi for MW/WTA analysis)
##Last worked on: November 2020
####Function to format an individual HOBO or ibutton file and extract metadata.####
format_micro_csv <- function(csv_name) {
## Checks to make sure that the file exists in the current working directory.
stopifnot(file.exists(csv_name))
print(paste("Now processing file ",csv_name))
## Reads in the beginning of the file to check the formatting.
header <- scan(csv_name,nlines=30,what='raw',quiet=TRUE)
is.formatted <- tolower(header[1]) %in% c("year,month,day,hour,temp,light",
"year,month,day,hour,temperature",
"year,month,day,hour,temp",
"year,month,day,hour,temp,",
"year,month,day,hour,temp,,",
"year,month,day,hour,temp,,,",
"year,month,day,hour,temp,,,,",
"year,month,day ,hour,temp",
"year,month,day,hour,value",
"year,month,day,hour,temp,light,,",
"year,month,day,hour,temp,light,,,",
"year,month,day,hour,temp,light,,,,",
"year,month,day,hour,temp,light,,,,,,,",
"year,month,day,hour,temp,intensity",
"year,month,day,hour,temp,temp")
is.datetime <- unlist(strsplit(header[1],split=","))[1] == "Date/Time"
is.ibutton <- any(grepl("iButton", header)) # determine whether the sensor is an ibutton or not
is.hobo <- any(grepl("Plot", header)) && any(grepl("Time", header)) && any(grepl("Title", header))
if (is.formatted == TRUE){
file <- read.table(csv_name,skip=1,sep=",")
DateTime <- paste(file[,2],"/",file[,3],"/",file[,1]," ",file[,4],":00",sep="")
file <- cbind(DateTime,file[-c(1:4)],NA)
}
else if (is.datetime) {
table <- read.table(csv_name,skip=1,header=FALSE,sep=",") # remove rows from the top of the data frame
DateTime <- strptime(table[,1],format="%m/%d/%y %H:%M")
temp <- as.vector(table[,2])
file <- data.frame(DateTime,temp,light=NA)
}
else if (is.ibutton == TRUE) {
# do this if the sensor is an ibutton do this if the ibutton is model DS1921G
if (any(grepl("DS1921G", header)) == TRUE) {
file <- read.table(csv_name,skip=15,header=FALSE,sep=",") # remove rows from the top of the data frame
}
if (any(grepl("DS1922L",header)) == TRUE) {
# do this if the ibutton is model DS1922L
file <- read.table(csv_name,skip=22,header=FALSE,sep=",") # remove rows from the top of the data frame
}
if (any(grepl("DS2422",header)) == TRUE) {
# do this if the ibutton is model DS1922
file <- read.table(csv_name,skip=20,header=FALSE,sep=",") # remove rows from the top of the data frame
}
if (any(grepl("DS1923",header)) == TRUE) {
# do this if the ibutton is model DS1923
file <- read.table(csv_name,skip=20,header=FALSE,sep=",") # remove rows from the top of the data frame
}
# do this if there is no header.
if (class(file) == "factor") {
if (file[1] == "Date/Time") {
file = file[-c(1:3)]
}
if (file[1] == "Unit") {
file = file[-c(1:2)]
}
if (file[1] == "Value") {
file = file[-1]
}
file = as.character(file)
tmp.file <- matrix(data = file, ncol = 3, nrow = length(file)/3, byrow = TRUE) # take data (which is currently in vector format) and convert to matrix format
file <- as.data.frame(tmp.file) # convert to data frame
file <- file[, -2] # remove the second column (unit)
}
if (dim(file)[2] == 1) {
file = as.character(file)
tmp.file <- matrix(data = file, ncol = 3, nrow = length(file)/3, byrow = TRUE) # take data (which is currently in vector format) and convert to matrix format
file <- as.data.frame(tmp.file) # convert to data frame
file <- file[, -2] # remove the second column (unit)
}
if (dim(file)[2] == 2) {
file = file
}
if (dim(file)[2] >= 3) {
file <- file[, -2] # remove the second column (unit), and any other columns
}
light.fill <- array("", dim = c(dim(file)[1], 1)) # make a vector of NAs to fill the light column (ibuttons don't record light)
file[, 3] = light.fill # add the vector to the data frame
} # end ibutton-specific procedure
else if (is.hobo==TRUE) {
# do this if the sensor is NOT an ibutton, and is a HOBO
sample <- header[c((length(header)-10):length(header))]
sample_split <- unlist(strsplit(sample,split=","))
is.comma.del <- length(sample)!=length(sample_split)
if(is.comma.del) {
file <- read.table(csv_name, header = F, sep = ",",skip=2,fill=TRUE) # read in the raw data .csv file (as comma delimited) as a dataframe
}
else{
file <- read.table(csv_name,header = F, sep="\t",skip=2,fill=TRUE) # read in the raw data .csv file (as tab delimited) as a dataframe
}
file <- file[,2:3] # remove all columns except 2 and 3 (date/time and temperature)
} # end HOBO-specific procedure
else{
print(paste("Could not recognize the formatting of ",csv_name))
}
names(file) <- c("DateTime", "Temperature") # name the columns
# Attempts to extract the time-zone from the header.
if(any(grepl("GMT-07:00", header,fixed=T)) | any(grepl("PDT", header,fixed=T))){
tz <- "PDT"
}
else if(any(grepl("GMT-00:00", header,fixed=T))){
tz <- "GMT"
}
else{
tz <- "UNK"
}
# Tests for a valid 4-digit year
valid_years <- 2007:2050
datestring <- strsplit(as.character(file$DateTime[1]),split=" ")[[1]][1]
yeartest <- function(x){any(grepl(as.character(x),datestring))}
valid.year <- any(sapply(valid_years,FUN=yeartest))
am.pm <- any(grepl("PM",file$DateTime[1:20]))
# Converts date vector to separate columns for year,month,day,and hour.
if(class(file$DateTime)[1]=="POSIXct"){
dateTime <- file$DateTime
}else if(am.pm && valid.year) {
dateTime <- strptime(file$DateTime, "%m/%d/%Y %r")
}else if(am.pm==FALSE && valid.year==TRUE) {
dateTime <- strptime(file$DateTime, "%m/%d/%Y %H:%M")
}else if(am.pm==TRUE && valid.year==FALSE) {
dateTime <- strptime(file$DateTime, "%m/%d/%y %r")
}else{
dateTime <- strptime(file$DateTime, "%m/%d/%y %H:%M")
}
YEAR <- as.numeric(strftime(dateTime,format="%Y"))
MONTH <- as.numeric(strftime(dateTime,format="%m"))
DAY <- as.numeric(strftime(dateTime,format="%d"))
HOUR <- as.numeric(strftime(dateTime,format="%H"))
MIN <- as.numeric(strftime(dateTime,format="%M"))
TEMP <- as.numeric(file$Temperature)
file <- cbind(YEAR,MONTH,DAY,HOUR,MIN,TEMP)
# Extracts the range of dates in the file.
date_min <- min(dateTime,na.rm=T)
date_max <- max(dateTime,na.rm=T)
date_lab <- paste(strftime(date_min,format="%Y-%m"),strftime(date_max,format="%Y-%m"),sep="-")
# Measures the logging interval
log_int <- dateTime[2] - dateTime[1]
# Extracts the minimum and maximum temperature.
temp_max <- max(TEMP,na.rm=T)
temp_min <- min(TEMP,na.rm=T)
# Extracts the number of measurements.
n_measurements <- length(na.omit(TEMP))
file_attrib <- list(data=file,
filename=strsplit(csv_name,split=".csv")[[1]],
filepath=paste(getwd(),csv_name,sep="/"),
n_measurements=n_measurements,
log_interval=log_int,
date_min=date_min,
date_max=date_max,
date_lab=date_lab,
temp_min=temp_min,
temp_max=temp_max,
hobo=is.hobo,
ibutton=is.ibutton,
formatted=is.formatted,
datetime=is.datetime,
tz=tz)
return(file_attrib)
}
####Function to batch-format mixed ibutton/HOBO microclimate files in a bunch of directories.####
batch_format_micro_csv <- function(input_paths=getwd(),output_path,file_prefixes,
output_metadata_filename="metadata.txt",overwrite=FALSE){
##Checks inputs
stopifnot(length(input_paths)==length(file_prefixes))
stopifnot(length(output_metadata_filename)==1)
stopifnot(all(dir.exists(input_paths)))
stopifnot(dir.exists(output_path))
stopifnot(is.logical(overwrite) & length(overwrite)==1)
##Deletes the existing metadata file if it exists
oldwd <- setwd(output_path)
if(file.exists(output_metadata_filename)){
file.remove(output_metadata_filename)
}
setwd(oldwd)
##Checks to see how many input files there are.
csv_all <- c()
for (i in 1:length(input_paths)){
csv_files <- list.files(input_paths[i],pattern=".csv$")
csv_all <- c(csv_all,csv_files)
}
nfiles <- length(csv_all)
print(paste("Now processing ",nfiles," microclimate files."))
##Sets up a file counter.
file_n <- 0
for (i in 1:length(input_paths)){
print(input_paths[i])
print(getwd())
oldwd <- setwd(input_paths[i])
flush.console()
print(paste("Now processing files in folder ",input_paths[i]))
files <- list.files(".",pattern=".csv$") # lists all the .csv files in the working directory (!!! WARNING: DON'T INCLUDE '.csv' IN ANY FILE NAMES IN THE WORKING DIRECTORY, '.csv' SHOULD ONLY APPEAR AS A FILE EXTENSION !!!). This script thinks that any file with the string '.csv' in the file name is a .csv file
dfs <- lapply(X = files, FUN = format_micro_csv) # process all .csv files in the working directory into a list of lists.
## write the modified files as .csv files to an output folder
# set the working directory to the folder that will collect the output files
setwd(oldwd)
oldwd <- setwd(output_path)
outfiles <- length(list.files())
if(outfiles>0){
print(paste("Output folder already contains ", outfiles,"files."))
}
for (j in 1:length(dfs)) {
datavals <- dfs[[j]]$data
in_name <- dfs[[j]]$filename
out_name <- paste(paste(file_prefixes[i],in_name,dfs[[j]]$date_lab,sep="_"),".csv",sep="")
meta <- data.frame(out_filename=out_name,dfs[[j]][-1])
if(overwrite==TRUE & file.exists(out_name)){
print(paste("Overwriting file", out_name))
write.csv(datavals, file = out_name, row.names = FALSE)
}else if(overwrite==FALSE & file.exists(out_name)){
print(paste("Skipping existing file", out_name))
}else{
print(paste("Writing file",out_name))
write.csv(datavals, file = out_name, row.names = FALSE)
}
# Writes the metadata, appending rows to an existing file if it already exists.
if (length(list.files(".",pattern=output_metadata_filename)) == 0){
write.table(meta,file=output_metadata_filename, sep=",",row.names = FALSE, append = FALSE)
}
else{
write.table(meta,file=output_metadata_filename, sep=",",row.names = FALSE,
col.names = FALSE, append = TRUE)
}
file_n <- file_n+1
print(paste("Completed processing file ",file_n," of ",nfiles))
}
setwd(oldwd)
}
##Returns output metadata
oldwd <- setwd(output_path)
meta_output <- read.csv(output_metadata_filename)
setwd(oldwd)
return(meta_output)
}
####Function to estimate snow cover duration from a series of .csv microclimate files.####
batch_extract_snow_vars <- function(input_path,input_metadata_filename="metadata.txt",
output_path,output_metadata_filename,
range_threshold=1,max_threshold=2,overwrite=FALSE){
##Loads required packages
require(data.table)
##Checks inputs
stopifnot(dir.exists(input_path))
stopifnot(dir.exists(output_path))
oldwd <- setwd(input_path)
stopifnot(file.exists(input_metadata_filename))
stopifnot(!file.exists(output_metadata_filename)|overwrite==TRUE)
files <- list.files(".",pattern=".csv$") # the data files for each temperature sensor to be analyzed
meta <- read.table(input_metadata_filename,sep=",",header=TRUE)
calibration <- c()
calibration.type <- c()
stand <- c()
plot <- c()
year <- c()
snow_appearance_date <- c()
snow_disappearance_date <- c()
snow_cover_duration <- c() # in days
minimum_soil_temp <- c()
# start the clock to record how long the code takes to run
start_t <- Sys.time()
nfiles <- length(files)
for (k in 1:nfiles) {
##Makes sure we are in the right directory
setwd(oldwd)
oldwd <- setwd(input_path)
##Prints progress.
flush.console()
print(paste("Now Processing file: ",files[k],"(",k," of",nfiles,")"))
############################################## READING IN DATA FROM ONE FILE
d <- read.csv(files[k])
d <- d[complete.cases(d[, 1:5]), ]
# Name the columns
names(d)[1] <- "YEAR"
names(d)[2] <- "MONTH"
names(d)[3] <- "DAY"
names(d)[4] <- "HOUR"
names(d)[5] <- "MIN"
names(d)[6] <- "TEMP"
d$Date <- as.Date(paste(d$MONTH, "/", d$DAY, "/", d$YEAR, sep = ""), format = "%m/%d/%Y")
d$DOY <- as.numeric(format(d$Date, format = "%j")) # find the unique days
d <- data.table(d,key="Date")
########################################
# EXTRACT STAND INFO FROM FILENAME
stand[k] <- strsplit(files[k], "_")[[1]][2]
plot[k] <- strsplit(files[k], "_")[[1]][3]
year[k] <- max(d$YEAR)
###############################################
# EVALUATE SNOW COVER CRITERIA
# Create an empty data table to store values
days <- unique(d$Date)
ndays <- length(days)
daily <- data.table(date=unique(d$Date),
range=rep(NA,ndays),
mean=rep(NA,ndays),
rangethresh=rep(NA,ndays),
maxthresh=rep(NA,ndays),
snow=rep(NA,ndays))
setkey(daily,"date")
# Calculate the mean daily temperature and temperature range for each day:
daily[[2]] <- d[,diff(range(TEMP)),by=Date][[2]]
daily[[3]] <- d[,mean(TEMP,na.rm=T),by=Date][[2]]
daily[[4]] <- d[,diff(range(TEMP)) < range_threshold,by=Date][[2]]
daily[[5]] <- d[,max(TEMP) < max_threshold,by=Date][[2]]
##############################################
# DETERMINE CALIBRATION TEMPERATURE
calibration.temp <- mean(daily$mean[daily$rangethresh == TRUE]) # calculate the mean temp for all the days that the temp didn't exceed range_threshold
calibration[k] <- ifelse(!is.na(calibration.temp), yes = calibration.temp, no = 0) # If NA, set calibration = 0
d$TEMP.calib <- d$TEMP - calibration[k] # recalibrate data
################################################
## EVALUATE WHETHER OR NOT SNOW COVERED THE SENSOR ON EACH DATE BASED ON THE ABOVE TWO CRITERIA
daily[[6]] <- daily[[4]] & daily[[5]] # will store the algorithm's evaluation of snow cover for each date (1=snow present, 0=snow absent)
d.snow <- subset(daily,snow==TRUE)
################################################
## SUMMARIZING
snow_appearance_date[k] <- as.character(min(d.snow$date)) # first day when snow covered sensor
snow_disappearance_date[k] <- as.character(max(d.snow$date)) # last day when snow covered sensor
snow_cover_duration[k] <- sum(d.snow$snow) #'snow cover duration' the total number of days with snow cover
minimum_soil_temp[k] <- min(d$TEMP,na.rm=TRUE) #Winter minimum soil temperature
}
##Computes elapsed time.
stop_t <- Sys.time()
print(paste("Run took ",stop_t - start_t))
# Consolidate summarized results for each sensor into one data frame
output <- data.frame(files,
year,
stand,
plot,
calibration,
snow_appearance_date,
snow_disappearance_date,
snow_cover_duration,
minimum_soil_temp)
##Match the snow cover information to the sensor metadata.
out_merged <- unique(merge(meta,output,by.x="out_filename",by.y="files",all.x=T))
##Data quality flags:
end_doy <- as.numeric(format(as.Date(out_merged$date_max),format="%j"))
out_merged$flag_sensor_fail <- ((as.Date(out_merged$snow_disappearance_date) - as.Date(out_merged$date_max)) >= -1) & end_doy < 152
out_merged$flag_temp_high <- out_merged$temp_max > 90
out_merged$flag_temp_low <- out_merged$temp_min < -30
out_merged$flag_high_calib <- out_merged$calibration >= 2 | out_merged$calibration <= -2
out_merged$flag_no_snow <- out_merged$snow_cover_duration <= 14
out_merged$flag_short_record <- as.Date(out_merged$date_max) - as.Date(out_merged$date_min) < 100
out_merged$flagged <- with(out_merged, flag_sensor_fail | flag_temp_high | flag_temp_low | flag_high_calib | flag_no_snow | flag_short_record)
##subsets data for the unflagged files
out_unflagged<- out_merged[out_merged$flagged==FALSE,]
# Save output file
setwd(oldwd)
oldwd <- setwd(output_path)
write.table(out_unflagged, file = output_metadata_filename, sep = ",", row.names = FALSE)
return(out_merged)
setwd(oldwd)
}