-
Notifications
You must be signed in to change notification settings - Fork 0
/
MW_Modelfitting(byspecies).Rmd
392 lines (295 loc) · 14.8 KB
/
MW_Modelfitting(byspecies).Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
---
title: "Analyze MeadoWatch Data"
author: "Janneke Hille Ris Lambers, Aji John, Meera Sethi, Elli Theobald"
date: "13/01/2021"
output: html_document
---
#Setup for R script and R markdown
1. Load all libraries
2. Specify string behavior
3. Load packages (should we add all packages here?)
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
options(stringsAsFactors = FALSE)
library(optimr)
library(tidyverse)
library(leaflet)
library(lubridate)
library(boot)
library(readr)
```
#Read in clean data, define wildflower community
This chunk of code reads in clean data, defines the focal species, and merges phenology data with plot-level observations for snow disappearance.
1. First, phenological functions (*HRL_phenology_functions.R*), cleaned phenology data (*PhenoSite_Clean.csv*), and snow disappearance data (*MW_SDDall.csv*) are read in. Clean data come from the data wrangling markdown (see *MW_DataWrangling.Rmd*), SDD data come from a microclimate script (*Microclimate.Rmd*), with missing snow disappearance data filled in (see *MW_SDDmodeling.Rmd*).
2. Phenology data are merged with snowmelt data.
3. Next, PhenoSite_Focal is created by subsetting. For reference:
A. Reflection Lakes species include: c("ANOC", "CAPA", "ERMO", "ERPE", "LIGR", "LUAR", "MIAL", "PEBR", "POBI", "VASI")
B. Glacier Basin species include: c("ANAR", "ARLA", "ASLE", "CAMI", "ERGR", "LICA", "LUAR", "MEPA", "PEBR", "POBI", "VASI")
++Note that LICA was mistaken for ANAR early on, so there is limited data from both species. MEPA and ANAR don't occur in meadows, and MIAL is rare and of all the species least likely to occur on both trails. Recommended species therefore:
c("ANOC", "ARLA", "ASLE", "CAMI", "CAPA", "ERMO", "ERPE", "LIGR", "LUAR", "PEBR", "POBI", "VASI")
4. From this, *nspp* (number of species) and *nyears* (number of years) are defined, which are used in for loops
```{r}
# read in maximum likelihood functions
source("./HRL_phenology_functions.R")
# Read in clean dataset; merge with SDD data
PhenoSite_0 <- read.csv("cleandata/PhenoSite_Clean.csv", header=TRUE)
SDDdat_0 <- read.csv("cleandata/MW_SDDall.csv", header=TRUE)
# Create new SDD - observed when present, predicted when not
SDDfin <- SDDdat_0$SDD
# This substitutes in predicted SDD where sensors failed = NA
SDDfin[is.na(SDDdat_0$SDD)==TRUE] <-
round(SDDdat_0$predSDD[is.na(SDDdat_0$SDD)==TRUE],0)
# this removes unwanted columns, including SDD and pred SDD
SDDdat <- subset(SDDdat_0, select=-c(Site_Num, Site_Loc, calibration,
snow_appearance_date,
snow_disappearance_date,
snow_cover_duration, minimum_soil_temp,
SDD, predSDD))
# This adds SDDfin back to data frame as SDD
SDDdat$SDD <- SDDfin
# Merge SDD and Pheno data
PhenoSite <- merge(PhenoSite_0,SDDdat,
by=c("Year", "Site_Code", "Transect"))
# Define the wildflower community: which species to include
species <- c("ANOC", "ARLA", "ASLE", "CAMI", "CAPA", "ERMO", "ERGR", "ERPE",
"LIGR", "LUAR", "PEBR", "POBI", "VASI")
# Create PhenoSite_Focal - data set with just species of interest
specieskeep <- PhenoSite$Species %in% species
PhenoSite_Focal <- PhenoSite[specieskeep,]
# Print out total number of observations
Nobs <- dim(PhenoSite_Focal)[1]
print("total observations"); print(Nobs)
# Determine how many years of data, how many species, etc
yrs <- unique(PhenoSite_Focal$Year)
nyrs <- length(yrs)
nspp <- length(species)
```
#Fit phenology model for all species data, using second model
This code fits a predictive phenology model to all observations of flowering for a particular species (function *curvefit_species*), for the same flowering community as defined above. Note that this takes a bit of time to run. The code does the following:
1. Subset data for a particular species.
2. For each species, use a for loop to assemble year-plot specific data for the species in question. This requires extracting year-plot specific data adding three no flowering observations before snowmelt (as was done for model fitting for individual year-plot-species data), and creating variables *days* (vector of DOY for all plots included), *phenophase* (vector of 1's and 0's observed), *SDD* (snow disappearance date at each plot for the observation in question).
3. Fit the maximum likelihood model (defined by *curvefit_species*) to the data (see *HRL_phenology_functions.R*).
4. Saves the parameters to *pars_spp* and the AIC values to *aics_sp*. Parameters include per-species slope and intercept (relating peak to SDD), as well as species specific range and max parameters
5. After the for loop, *pars_spp* and *aics_sp* are turned into data frames and saved as .csv files in the data folder.
```{r}
# objects in which to append output
pars_spp <- c() #save parameters
aics_sp <-c() #save AIC of overall model
# Read in per plot model fits, to use for initial parameters
pars_yrpltsp <- read.csv("output/Parameters_plotmodel.csv", header=TRUE)
# Now nested for loops to fit curves for all years, species, plots
for(i in 1:nspp){ #loop for each species, to assemble data
#extract data for that year
PhenoSite_Species <- PhenoSite_Focal[PhenoSite_Focal$Species==species[i],]
#assemble data - need to add zeros before SDD, define trail year combos
yrplt <- paste(PhenoSite_Species$Year, PhenoSite_Species$Site_Code, sep="")
yrplts <- unique(yrplt)
#now define vectors to fill
days <- c()
phenophase <- c()
SDD <- c()
#Now go through each unique year - plot combination per species
for(j in 1:length(yrplts)){
PhenoSite_YearPlotSpecies <- PhenoSite_Species[yrplt[]==yrplts[j],]
tmpdays <- PhenoSite_YearPlotSpecies$DOY #explanatory variable: DOY
tmpphenophase <- PhenoSite_YearPlotSpecies$Flower #yes / no flowers
tmpSDD <- PhenoSite_YearPlotSpecies$SDD
#remove days when no observations were made; NA in phenophase
tmpdays <- tmpdays[is.na(tmpphenophase)==FALSE]
tmpSDD <- tmpSDD[is.na(tmpphenophase)==FALSE]
tmpphenophase <- tmpphenophase[is.na(tmpphenophase)==FALSE]
#Add observations of no flowering 3 weeks prior to snowmelt
SDDplt <- min(tmpSDD)
tmpdays <- c(SDDplt-21, SDDplt-14, SDDplt-7,tmpdays)
tmpphenophase <- c(0,0,0,tmpphenophase)
tmpSDD <- c(tmpSDD[1:3],tmpSDD)
#now append to days, phenophase, SDD
days <- c(days, tmpdays)
phenophase <- c(phenophase, tmpphenophase)
SDD <- c(SDD, tmpSDD)
}
#specify initial parameters (note, optim can be fussy)
optimcoef <- c(25,1)
spdur <- median(pars_yrpltsp$duration[pars_yrpltsp$species==species[i]])
spmx <- median(pars_yrpltsp$max[pars_yrpltsp$species==species[i]])
#Fit model: first time using opm to get best results
param <- c(optimcoef, spdur, spmx) #rep(spdur, times=nyr), rep(spmx, times=nyr))
modelsnowmod1 <- opm(param, curvefit_species,
method = c("BFGS","hjn", "Nelder-Mead"),
control = list(maxit = 50000))
print(species[i]); print(modelsnowmod1)
#Now refit using Nelder Mead, using initials from previous
ind <- which(modelsnowmod1$value==min(modelsnowmod1$value))
param <- c(modelsnowmod1$p1[ind], modelsnowmod1$p2[ind],
modelsnowmod1$p3[ind], modelsnowmod1$p4[ind])
modelsnowmod <- optim(param, curvefit_species,
method = c("Nelder-Mead"),
control = list(maxit = 50000))
#check in case model didn't converge
if(modelsnowmod$convergence == 1){print(paste(species[i],"-issues",sep=""))}
#Save parameters to appropriate file
tmppars <- c(species[i],modelsnowmod$par)
pars_spp <- rbind(pars_spp,tmppars)
#save AIC
tmpaic <- c(species[i],round(2*(modelsnowmod$value+length(param)),2))
aics_sp <- rbind(aics_sp,tmpaic)
}
#make par_spp_peak, pars_spp_range, and pars_spp_max, data frames
dimnames(pars_spp) <- list(c(), c("species","peakint", "peakSDDslope",
"range","max"))
dimnames(aics_sp) <- list(c(), c("species","AIC"))
#Change to data frame
pars_spp <- data.frame(pars_spp)
aics_sp <- data.frame(aics_sp)
#change storage type
pars_spp$species <- as.factor(pars_spp$species)
aics_sp$species <- as.factor(aics_sp$species)
aics_sp$AIC <- as.numeric(aics_sp$AIC)
for(i in 2:4){pars_spp[,i] <- as.numeric(pars_spp[,i])}
head(pars_spp)
head(aics_sp)
#Write fitted parameters
write.csv(pars_spp, "output/Parameters_speciesmodel.csv", quote=FALSE,
row.names=FALSE)
write.csv(aics_sp, "output/AIC_speciesmodel.csv", quote=FALSE,
row.names=FALSE)
```
#Calculate AIC species model vs. separate parameters for all plots
Note - this shows that assuming separate parameters provides a better model fit, despite needing a lot more parameters. So we can't use model fit to justify fitting the all species model. However, looking below you can see this model does a pretty good job predicting peak flowering, which is the main thing we are interested in.
```{r}
aics_yrpltsp <- read.csv("output/AIC_plotmodel.csv", header = TRUE)
aics_yrpltsp2 <- tapply(aics_yrpltsp$AICalt, aics_yrpltsp$species, sum)
aics_yrpltsp3 <- data.frame(names(aics_yrpltsp2),aics_yrpltsp2)
colnames(aics_yrpltsp3) <- c("species","plotmodel")
rownames(aics_yrpltsp3) <- c()
aics_comp <- merge(aics_sp, aics_yrpltsp3)
dimnames(aics_comp)[[2]][2] <- "speciesmodel"
dimnames(aics_comp)[[2]][3] <- "plotmodel"
aics_comp
```
# Show plot-level peak fits vs. fitted coefficients
These graphs shows the plot level fits of peak flowering vs. SDD found at the site, and the fitted relationship from the model fit across species (black line). Shown additionally is the snowmelt date (blue dashed line) and the line describing a linear model of the plot level peak parameters vs. SDD.
```{r}
#Read in per plot model fits, so individual plot fits can be used
pars_yrpltsp <- read.csv("output/Parameters_plotmodel.csv", header=TRUE)
#extract species
#species <- pars_spp_peak$species
yrcols <- c("green","lightblue","orange","purple","pink","red","grey")
##Plot SDD vs. peak per species
for(i in 1:length(species)){
sp_pars <- pars_yrpltsp[pars_yrpltsp$species==species[i],]
yrcolind <- sp_pars$year - 2012
#Plot points showing plot SDD and plot level peak fits
par(mfrow=c(1,1), omi=c(0,0,0,0), mai=c(0.5,0.6,0.5,0.5),
tck=-0.02, mgp=c(1.1,0.5,0))
plot(sp_pars$SDD, sp_pars$peak, pty="s",
pch=21, bg=yrcols[yrcolind], cex=1.5,
xlab="Snow Disappearance Date", ylab="Peak Flowering")
title(species[i])
legend(x="topleft", legend=yrs, pt.bg=yrcols, pch=21,
cex=0.75, pt.cex=1.5)
#add line showing modeled relationship between SDD and peak
intSDD <- pars_spp$peakint[pars_spp$species==species[i]]
slSDD <- pars_spp$peakSDDslope[pars_spp$species==species[i]]
abline(intSDD, slSDD)
#add line showing linear model between SDD and fit
SDDcoef <- coef(lm(sp_pars$peak~sp_pars$SDD))
abline(SDDcoef, col="grey", lty=2, lwd=2)
#add line showing snowmelt
abline(0,1, col="lightblue", lty=3, lwd=2)
#add legend showing line types
legend(x="bottomright", legend=c("MLE","LM","snow"), lty=c(1,2,3),
lwd=c(1,1,2), col=c("black","grey","lightblue"), cex=0.75)
}
```
#Appendix F: One figure, multi-panel of the same results (for Appendix F)
```{r}
#Read in per plot model fits, so individual plot fits can be used
pars_yrpltsp <- read.csv("output/Parameters_plotmodel.csv", header=TRUE)
#set up plot
tiff(file="output/figures/FigF1.tif", width=5, height=7, units="in", res=600)
par(mfrow=c(5,3), omi=c(0,0,0,0), mai=c(0.2,0.2,0.20,0.1),
tck=-0.01, mgp=c(0.7,0.1,0), pty="s")
##Plot SDD vs. peak per species
for(i in 1:length(species)){
sp_pars <- pars_yrpltsp[pars_yrpltsp$species==species[i],]
#Plot points showing plot SDD and plot level peak fits
plot(sp_pars$SDD, sp_pars$peak, pch=21, bg="grey",
xlab="SDDDOY", ylab="Peak flower",
cex.axis=0.6, cex.lab=0.8)
title(species[i], line=0.25)
#add line showing modeled relationship between SDD and peak
intSDD <- pars_spp$peakint[pars_spp$species==species[i]]
slSDD <- pars_spp$peakSDDslope[pars_spp$species==species[i]]
abline(intSDD, slSDD)
}
dev.off()
```
#Make figures for methods supplement
+Vasi: one example plot (yes / no)
+Vasi: SDDOY vs. peak flowering
```{r}
##############
# Fig. H1 Time vs. Yes / No
sppplot <- "VASI" #pick species to plot; check 'species' for possibilities
phenoplot <- "flwr" #pick phenophase; check 'phenocats'
yearplot <- 2017
plotplot <- "RL2"
#extract raw data
SppDat <- PhenoSite_Focal[PhenoSite_Focal$Year==yearplot
& PhenoSite_Focal$Species==sppplot
& PhenoSite_Focal$Site_Code == plotplot, ]
#First SDD vs yes / no relationship
days <- SppDat$DOY
phenophase <- SppDat$Flower
days <- days[is.na(phenophase)==FALSE];
phenophase <- phenophase[is.na(phenophase)==FALSE]
param <- c(mean(days),-0.001, 0)
model1 <- optim(param, curvefit_perplot, control = list(maxit = 20000))
#Model prediction (for line)
time <- seq(180,285, by=0.25)
peakp <- model1$par[1]
rangep <- model1$par[2]
maxp <- model1$par[3]
probf <- inv.logit(rangep * (time - peakp)^2 + maxp)
#Now plot
tiff(file="output/figures/FigH1.tif",
width=4, height=3.75, units="in", res=600)
par(mfrow=c(1,1), omi=c(0,0,0,0), mai=c(0.5,0.5,0.5,0.5), mgp=c(1.1,0.3,0),
tck=-0.02, xpd="TRUE")
plot(time, probf, ylim=c(0,1), xlim=c(180,285),
type="l", col="grey", lwd=2, xlab="", ylab="Flowers Observed",
xaxp=c(180,240,2), xaxt="n", yaxt="n")
points(SppDat$DOY, SppDat$Flower, pch=21, bg="grey", cex=1.5)
text(c(195,225,255,285),-0.075,labels=c("June","July","Aug","Sept"))
text(172.5,c(0.025,0.975),labels=c("No","Yes"), srt=90)
dev.off
##############
# Fig. H2 SDDOY vs. peak flowering fits
#Read in per plot model fits, so individual plot fits can be used
pars_yrpltsp <- read.csv("output/Parameters_plotmodel.csv", header=TRUE)
#extract species
sp_pars <- pars_yrpltsp[pars_yrpltsp$species=="VASI",]
#Plot points showing plot SDD and plot level peak fits
tiff(file="output/figures/FigH2.tif",
width=5, height=5, units="in", res=600)
par(mfrow=c(1,1), omi=c(0,0,0,0), mai=c(0.5,0.6,0.5,0.5),
tck=-0.02, mgp=c(1.1,0.5,0), xpd="NA")
#for line showing modeled relationship between SDD and peak
intSDD <- pars_spp$peakint[pars_spp$species==species[i]]
slSDD <- pars_spp$peakSDDslope[pars_spp$species==species[i]]
xs <- c(90,210)
ys <- intSDD + slSDD*xs
#Now plot
plot(xs,ys,type="l", bg="grey", lwd=2,
cex.lab=1.25, xlim=c(90,210),ylim=c(150,240),
xaxp=c(90,210,4), yaxp=c(150,240,3), xaxt="n",yaxt="n",
xlab="Date of Snowmelt", ylab="Peak Flowering")
#add points
points(sp_pars$SDD, sp_pars$peak,pch=21, bg="grey", cex=1.5)
#add text for months, name of species
text(c(105,135,165,195),143.5,labels=c("April","May","June","July"))
text(82.5, c(165,195,225),labels=c("June","July","August"),srt=90)
title("Sitka Valerian", line=-1, adj=0.05)
dev.off()
```