-
Notifications
You must be signed in to change notification settings - Fork 0
/
1.2_evalWDPA.r
296 lines (232 loc) · 16.5 KB
/
1.2_evalWDPA.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# evaluate protected areas and networks against shifts in species distribution
# calculate species gains, losses, turnover, etc.
# also output table of species p(occur) by MPA in the current time-period (2007-2020)
############
## Flags
############
# choose the rcp(s)
RCPS <- c(26, 85)
# select initial and final timeperiod for these grids
PERIODS <- c('2007-2020', '2081-2100')
# climate models in the projections
#bcc-csm1-1-m, bcc-csm1-1, CanESM2, CCSM4, CESM1-CAM5, CNRM-CM5, GFDL-CM3, GFDL-ESM2M, GFDL-ESM2G, GISS-E2-R, GISS-E2-H, HadGEM2-ES, IPSL-CM5A-LR, IPSL-CM5A-MR, MIROC-ESM, MIROC5, MPI-ESM-LR, NorESM1-ME
NMODS <- 18 # for the pres/abs models. Models 12 (HadGEM2-ES) and 16 (MIROC5) are extraneous since not in the biomass projections used in rest of project
MODSPRESTODROP <- c(12,16)
# path to the pres-abs projection results from Morley et al. 2018. At 0.05 grid size.
PRESABSPATH <- '/local/shared/pinsky_lab/projections_PlosOne2018/CEmodels_proj_PresAbs_May2018'
####################
## helper functions
####################
require(data.table)
# set default rounding (2 bytes) so that data.table will merge numeric values appropriately
# see https://rdrr.io/rforge/data.table/man/setNumericRounding.html
setNumericRounding(2)
#require(lme4) # for mixed-effects models
#require(car) # for testing ME models
#require(Hmisc)
###########################
## Load and set up WDPA data
###########################
wdpagrid <- fread('gunzip -c output/wdpa_cov_by_grid0.05.csv.gz', drop = 1) # shows which MPAs are in which grid cells. each line is a unique grid cell-MPA combination.
# convert lon to -360 to 0 (instead of -180 to 180) to match the pres/abs projections and to plot more nicely
wdpagrid[lon>0, lon := lon-360]
# set up MPA networks
wdpagrid[SUB_LOC == 'US-CA' & MANG_AUTH == 'State Fish and Wildlife',network := 'mlpa']
wdpagrid[grepl('US-DE|US-FL|US-GA|US-MA|US-MD|US-ME|US-NC|US-NJ|US-NY|US-RI|US-SC|US-VA', SUB_LOC) & !grepl('US-N/A', SUB_LOC) & (lon > -100), network := 'eastcoast'] # by exluding US-N/A, this won't include federal water closures
wdpagrid[(SUB_LOC %in% c('US-AK')), network := 'ak'] # by choosing by state, this won't include federal water closures
# calculate mpa extent
wdpagrid[, ':='(lat_min = min(lat), lat_max = max(lat), lon_min = min(lon), lon_max = max(lon)), by = WDPA_PID]
# wdpagrid[network=='mlpa',plot(lon,lat)]
# wdpagrid[network=='eastcoast',plot(lon,lat)]
# wdpagrid[network=='ak',plot(lon,lat)]
# require(maps); map(database='world2',add=TRUE)
##############################################################
# Load in pres/abs results for each RCP and each species
# and calculate summary statistics on species turnover by MPA
##############################################################
# the list of MPAs. Turnover data will be added as columns to this
wdpa <- wdpagrid[!duplicated(WDPA_PID), ]
wdpa[,c('gridpolyID', 'lat', 'lon', 'area_wdpa', 'area_grid', 'prop_grid', 'prop_wdpa') := NULL] # remove gridcell-specific columns
# step through each species' projections
projcols <- c('lat', 'lon', paste0('mean', 1:NMODS)) # names of columns in pred.agg to use (the projections)
for (i in 1:length(RCPS)) {
print(paste0(Sys.time(), ' On RCP ', RCPS[i], '. Species: '))
files <- list.files(path = PRESABSPATH, pattern = paste('*rcp', RCPS[i], '*', sep = ''), full.names = TRUE)
# load presmap for each model run and process the results
# then calculate change in p(occur)
for (j in 1:length(files)) {
cat(paste0(' ', j))
load(files[j]) # loads pred.agg data.frame
pred.agg <- as.data.table(pred.agg)
# round lat lon to nearest 0.05 so that merging is successful with WDPA
pred.agg[, lat := floor(latitude*20)/20 + 0.025] # assumes 0.05 grid size
pred.agg[, lon := floor(longitude*20)/20 + 0.025] # assumes 0.05 grid size
setkey(pred.agg, lat, lon)
# reorganize so that time 2 and time 1 are separate columns, rows are locations
presbyloc <- merge(pred.agg[year_range == PERIODS[1], ..projcols], pred.agg[year_range == PERIODS[2], ..projcols], by = c('lat', 'lon'), suffixes = c('.t1', '.t2'))
# merge pres/abs data with WDPA grid data and calculate summary stats about species change through time
# summary stat column names are of form [sumstatname].[RCP].[modnumber]
wdpagridspp <- merge(wdpagrid, presbyloc, all.x = TRUE, by = c('lat', 'lon'))
for (m in 1:NMODS) { # for each climate model
if(!(m %in% MODSPRESTODROP)){ # drop two of the pres-abs models
if (m < 12) thismodnum <- m
if (m > 12 & m < 16) thismodnum <- m - 1 # adjust model numbers to match biomass models
if (m > 16) thismodnum <- m - 2
wdpagridspp[is.na(wdpagridspp[[paste0('mean', m, '.t1')]]), (paste0('mean', m, '.t1')) := 0] # set NAs to 0 (species not present)
wdpagridspp[is.na(wdpagridspp[[paste0('mean', m, '.t2')]]), (paste0('mean', m, '.t2')) := 0]
# aggregate p(occur) across grid cells into full MPAs
sppbyMPA <- wdpagridspp[, .(pinit = 1 - prod(1 - get(paste0('mean', m, '.t1'))*prop_wdpa), # p(occur whole MPA) at initial timepoint as 1 - prod(1-p(occur per grid))
pfinal = 1 - prod(1 - get(paste0('mean', m, '.t2'))*prop_wdpa)
), by = WDPA_PID]
#print(dim(sppbyMPA))
# merge wpda with sppbyMPA (latter has results from the current species)
wdpa <- merge(wdpa, sppbyMPA, by = 'WDPA_PID')
# calculate summary stats by MPA and add this species onto results from previous species
if (!(paste0('ninit.', RCPS[i], '.', thismodnum) %in% colnames(wdpa))) { # if output column in wdpa doesn't yet exist, create it
wdpa[, (paste0('ninit.', RCPS[i], '.', thismodnum)) := pinit]
wdpa[, (paste0('nfinal.', RCPS[i], '.', thismodnum)) := pfinal]
wdpa[, (paste0('nshared.', RCPS[i], '.', thismodnum)) := pinit*pfinal]
wdpa[pinit > pfinal, (paste0('nlost.', RCPS[i], '.', thismodnum)) := pinit - pfinal]
wdpa[pinit <= pfinal, (paste0('nlost.', RCPS[i], '.', thismodnum)) := 0]
wdpa[pinit < pfinal, (paste0('ngained.', RCPS[i], '.', thismodnum)) := pfinal - pinit]
wdpa[pinit >= pfinal, (paste0('ngained.', RCPS[i], '.', thismodnum)) := 0]
} else {# if columns already exist, add the new species' values onto the existing values
wdpa[, (paste0('ninit.', RCPS[i], '.', thismodnum)) := get(paste0('ninit.', RCPS[i], '.', thismodnum)) + pinit]
wdpa[, (paste0('nfinal.', RCPS[i], '.', thismodnum)) := get(paste0('nfinal.', RCPS[i], '.', thismodnum)) + pfinal]
wdpa[, (paste0('nshared.', RCPS[i], '.', thismodnum)) := get(paste0('nshared.', RCPS[i], '.', thismodnum)) + pinit*pfinal]
wdpa[pinit > pfinal, (paste0('nlost.', RCPS[i], '.', thismodnum)) := get(paste0('nlost.', RCPS[i], '.', thismodnum)) + pinit - pfinal]
wdpa[pinit < pfinal, (paste0('ngained.', RCPS[i], '.', thismodnum)) := get(paste0('ngained.', RCPS[i], '.', thismodnum)) + pfinal - pinit]
}
wdpa[, c('pinit', 'pfinal') := NULL] # delete the species-specific columns now that we have the summaries we need
}
}
# wdpa[,print(max(get(paste0('ninit.', RCPS[i], '.1'))))] # a test that cumulative sums are being calculated. Should increase through the loop
}
}
print(Sys.time())
dim(wdpa) # 923 x 127
# how many rows are zero?
wdpa[, .(sum(ninit.26.1 == 0), sum(ninit.85.1 == 0))]
wdpa[, .(sum(ninit.26.1 > 0), sum(ninit.85.1 > 0))]
# wdpa[ninit.26.1 > 0, plot(lon_max, lat_max, cex = 0.1, xlim=c(-190, 0))]
# wdpa[ninit.26.1 == 0, points(lon_max, lat_max, cex = 0.5, col = 'red')]
# write out species data
write.csv(wdpa, file = gzfile('temp/wdpaturnbyMPAbymod.csv.gz'))
# wdpa <- fread('gunzip -c temp/wdpaturnbyMPAbymod.csv.gz', drop = 1) # don't read the row numbers
################################################################################
# Load in pres/abs results for each RCP and each species
# and aggregate p(occur) for each species in each MPA for current time-period
# for comparison against observations
################################################################################
projcols <- c('lat', 'lon', paste0('mean', 1:NMODS)) # names of columns in pred.agg to use (the projections)
for (i in 1:length(RCPS)) {
print(paste0(Sys.time(), ' On RCP ', RCPS[i], '. Species: '))
files <- list.files(path = PRESABSPATH, pattern = paste('*rcp', RCPS[i], '*', sep = ''), full.names = TRUE)
# load presmap for each model run and process the results
for (j in 1:length(files)) {
cat(paste0(' ', j))
load(files[j]) # loads pred.agg data.frame
thisspp <- gsub(paste0(PRESABSPATH, '/|_Atl|_Pac|_rcp26|_rcp85|_jas_PresAbs_projection_AGG.RData'), '', files[j])
pred.agg <- as.data.table(pred.agg)
# round lat lon to nearest 0.05 so that merging is successful with WDPA
pred.agg[, lat := floor(latitude*20)/20 + 0.025] # assumes 0.05 grid size
pred.agg[, lon := floor(longitude*20)/20 + 0.025] # assumes 0.05 grid size
setkey(pred.agg, lat, lon)
# trim to 2007-2020
pred.agg <- pred.agg[year_range == '2007-2020',]
# merge pres/abs data with WDPA grid data
wdpagridspp <- merge(wdpagrid, pred.agg, by = c('lat', 'lon')) # don't include all MPAs, to drop MPAs for which this species wasn't projected (e.g., Atl vs Pac)
# calculate summary stats about species p(occur) by MPA for each climate model
for (m in 1:NMODS) { # for each climate model
if(!(m %in% MODSPRESTODROP)){ # drop two of the pres-abs models
if (m < 12) thismodnum <- m
if (m > 12 & m < 16) thismodnum <- m - 1 # adjust model numbers to match biomass models
if (m > 16) thismodnum <- m - 2
# aggregate p(occur) across grid cells into full MPAs
sppbyMPA <- wdpagridspp[, .(spp = thisspp, rcp = RCPS[i], model = thismodnum, poccur = 1 - prod(1 - get(paste0('mean', m))*prop_wdpa)), by = WDPA_PID] # p(occur whole MPA) at initial timepoint as 1 - prod(1-p(occur per grid))
#print(dim(sppbyMPA))
# add this species/model/rcp onto the output data.table
if(!exists('wdpa_by_spp_projnow')) { # create the output table if it doesn't exist
wdpa_by_spp_projnow <- sppbyMPA
} else {
wdpa_by_spp_projnow <- rbind(wdpa_by_spp_projnow, sppbyMPA)
}
}
}
}
}
print(Sys.time())
dim(wdpa_by_spp_projnow) #
# write out species data
write.csv(wdpa_by_spp_projnow, file = gzfile('temp/wdpa_by_spp_projnow.csv.gz'))
##############################################
# Calculate turnover within select MPA networks
# For each model in the ensemble
##############################################
wdpanet <- wdpagrid[!is.na(network), .(lat = mean(lat), lon = mean(lon), area = sum(area_fullwdpa)), by = network ] # the list of MPA networks. Turnover data will be added as columns to this
projcols <- c('lat', 'lon', paste0('mean', 1:NMODS)) # names of columns in pred.agg to use (the projections)
for (i in 1:length(RCPS)) {
print(paste0(Sys.time(), ' On RCP ', RCPS[i], '. Species: '))
files <- list.files(path = PRESABSPATH, pattern = paste('*rcp', RCPS[i], '*', sep = ''), full.names = TRUE)
# load presmap for each model run and process the results
# then calculate change in p(occur)
for (j in 1:length(files)) {
cat(paste0(' ', j))
load(files[j]) # loads pred.agg data.frame
pred.agg <- as.data.table(pred.agg)
# round lat lon to nearest 0.05 so that merging is successful with WDPA
pred.agg[, lat := floor(latitude*20)/20 + 0.025] # assumes 0.05 grid size
pred.agg[, lon := floor(longitude*20)/20 + 0.025] # assumes 0.05 grid size
setkey(pred.agg, lat, lon)
# reorganize so that time 2 and time 1 are separate columns, rows are locations
presbyloc <- merge(pred.agg[year_range == PERIODS[1], ..projcols], pred.agg[year_range == PERIODS[2], ..projcols], by = c('lat', 'lon'), suffixes = c('.t1', '.t2'))
# merge pres/abs data with WDPA grid data and calculate summary stats about species change through time
# summary stat column names are of form [sumstatname].[RCP].[modnumber]
wdpagridspp <- merge(wdpagrid, presbyloc, all.x = TRUE, by = c('lat', 'lon'))
for (m in 1:NMODS) { # for each climate model
if (!(m %in% MODSPRESTODROP)) { # drop two of the pres-abs models
if (m < 12) thismodnum <- m
if (m > 12 & m < 16) thismodnum <- m - 1 # adjust model numbers to match biomass models
if (m > 16) thismodnum <- m - 2
wdpagridspp[is.na(wdpagridspp[[paste0('mean', m, '.t1')]]), (paste0('mean', m, '.t1')) := 0] # set NAs to 0 (species not present)
wdpagridspp[is.na(wdpagridspp[[paste0('mean', m, '.t2')]]), (paste0('mean', m, '.t2')) := 0]
# aggregate p(occur) across grid cells into full MPA networks
sppbynet <- wdpagridspp[!is.na(network), .(pinit = 1 - prod(1 - get(paste0('mean', m, '.t1'))*prop_wdpa), # p(occur whole MPA) at initial timepoint as 1 - prod(1-p(occur per grid))
pfinal = 1 - prod(1 - get(paste0('mean', m, '.t2'))*prop_wdpa)
), by = network]
#print(dim(sppbynet))
# merge wpda with sppbynet (latter has results from the current species)
wdpanet <- merge(wdpanet, sppbynet, by = 'network')
# calculate summary stats and add this species onto results from previous species
if (!(paste0('ninit.', RCPS[i], '.', thismodnum) %in% colnames(wdpanet))) { # if output column in wdpanet doesn't yet exist, create it
wdpanet[, (paste0('ninit.', RCPS[i], '.', thismodnum)) := pinit]
wdpanet[, (paste0('nfinal.', RCPS[i], '.', thismodnum)) := pfinal]
wdpanet[, (paste0('nshared.', RCPS[i], '.', thismodnum)) := pinit*pfinal]
wdpanet[pinit > pfinal, (paste0('nlost.', RCPS[i], '.', thismodnum)) := pinit - pfinal]
wdpanet[pinit <= pfinal, (paste0('nlost.', RCPS[i], '.', thismodnum)) := 0]
wdpanet[pinit < pfinal, (paste0('ngained.', RCPS[i], '.', thismodnum)) := pfinal - pinit]
wdpanet[pinit >= pfinal, (paste0('ngained.', RCPS[i], '.', thismodnum)) := 0]
} else {# if columns already exist, add the new species' values onto the existing values
wdpanet[, (paste0('ninit.', RCPS[i], '.', thismodnum)) := get(paste0('ninit.', RCPS[i], '.', thismodnum)) + pinit]
wdpanet[, (paste0('nfinal.', RCPS[i], '.', thismodnum)) := get(paste0('nfinal.', RCPS[i], '.', thismodnum)) + pfinal]
wdpanet[, (paste0('nshared.', RCPS[i], '.', thismodnum)) := get(paste0('nshared.', RCPS[i], '.', thismodnum)) + pinit*pfinal]
wdpanet[pinit > pfinal, (paste0('nlost.', RCPS[i], '.', thismodnum)) := get(paste0('nlost.', RCPS[i], '.', thismodnum)) + pinit - pfinal]
wdpanet[pinit < pfinal, (paste0('ngained.', RCPS[i], '.', thismodnum)) := get(paste0('ngained.', RCPS[i], '.', thismodnum)) + pfinal - pinit]
}
wdpanet[, c('pinit', 'pfinal') := NULL] # delete the species-specific columns now that we have the summaries we need
}
}
# wdpanet[,print(max(get(paste0('ninit.', RCPS[i], '.1'))))] # a test that cumulative sums are being calculated. Should increase through the loop
}
cat('\n')
}
print(Sys.time())
dim(wdpanet) # 923 x 127
# how many rows are zero?
wdpanet[, .(sum(ninit.26.1 == 0), sum(ninit.85.1 == 0))]
wdpanet[, .(sum(ninit.26.1 > 0), sum(ninit.85.1 > 0))]
# wdpanet[ninit.26.1 > 0, plot(lon_max, lat_max, cex = 0.1, xlim=c(-190, 0))]
# wdpanet[ninit.26.1 == 0, points(lon_max, lat_max, cex = 0.5, col = 'red')]
# write out species data
write.csv(wdpanet, file = gzfile('temp/wdpaturnbynetbymod.csv.gz'), row.names = FALSE)
# wdpanet <- fread('gunzip -c temp/wdpaturnbynetbymod.csv.gz') # don't read the row numbers