-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path00functions.jl
268 lines (219 loc) · 7.82 KB
/
00functions.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
#=
Functions used in scripts
=#
#################
# Preprocessing #
#################
#Change NaN values to 0
function nan_to_0(s)
for j in 1:length(s)
if isnan(s[j])
s[j] = 0
end
end
end
#####################
# Creating datasets #
#####################
#=
Create Training and Testing datasets
=#
#Extract 150 random 9x9 resistance, origin, and connectivity layers
begin
bounds_connect = Connectivity[:,1:size(Connectivity,2)-Stride]
presence_data = findall(x->x > 0, bounds_connect)
#for maps and connect
Random.seed!(1234)
samp_pts = sample(presence_data, number_of_samples)
get_train_samp1 = []
get_train_samp2 = []
for i in 1:length(samp_pts)
y = samp_pts[i][1]
x = samp_pts[i][2]
push!(get_train_samp1, x)
push!(get_train_samp2, y)
end
#for test_maps, test_connect
Random.seed!(5678)
samp_pts2 = sample(presence_data, number_of_samples)
get_train_samp3 = []
get_train_samp4 = []
for i in 1:length(samp_pts2)
x = samp_pts[i][1]
y = samp_pts[i][2]
push!(get_train_samp3, y)
push!(get_train_samp4, x)
end
end
function make_datasets(Resistance, Origin, Connectivity)
maps = []
connect = []
for i in get_train_samp2, j in get_train_samp1
#taking groups of matrices of dimensions StridexStride
x_res = Resistance[i:(i+Stride-1),j:(j+Stride-1)]
x_or = Origin[i:(i+Stride-1),j:(j+Stride-1)]
x = cat(x_res, x_or, dims=3) #concatenate resistance and origin layers
y = Connectivity[i:(i+Stride-1),j:(j+Stride-1)] #matrix we want to predict
# if minimum(y) > 0 #predict only when there is connectivity
push!(maps, x)
push!(connect, y)
# end
end
#create Testing dataset
test_maps = []
test_connect = []
for i in get_train_samp4, j in get_train_samp3
x_res = Resistance[i:(i+Stride-1),j:(j+Stride-1)]
x_or = Origin[i:(i+Stride-1),j:(j+Stride-1)]
x = cat(x_res, x_or, dims=3)
y = Connectivity[i:(i+Stride-1),j:(j+Stride-1)]
#if minimum(y) > 0
push!(test_maps, x)
push!(test_connect, y)
#end
end
return maps, connect, test_maps, test_connect
end
#=
Push samples from multiple species into maps_multisp, connect_multisp, test_multisp, test_maps_connect_multisp
=#
function samp_multi_sp(sp_res, sp_or, sp_con)
push!(maps_multisp, make_datasets(sp_res, sp_or, sp_con)[1])
push!(connect_multisp, make_datasets(sp_res, sp_or, sp_con)[2])
push!(test_multisp, make_datasets(sp_res, sp_or, sp_con)[3])
push!(test_maps_connect_multisp, make_datasets(sp_res, sp_or, sp_con)[4])
end
###########################
# Visualize sample points #
###########################
function visual_samp_pts(get_training_samp, get_training_samp2)
samp_pts = Tuple.(zip(get_training_samp, get_training_samp2))
begin
O_img = readpng("Origin.png")
w = O_img.width
h = O_img.height
#create a drawing surface of the same size
fname = "boxplotsamples_on_map.png" #TODO: make sure to change filename!!
Drawing(w, h, fname)
#place the image on the Drawing -- it's positioned by its top/left corner
placeimage(O_img, 0,0)
# now annotate the image. The (0/0) is at the top left.
sethue("red")
scale(0.40, 0.2905) #scale points to match size of basemap
Luxor.translate(113.5, 28) #move points to fit within basemap bounds
setline(1) #width of boxlines
#get the points used for the training samples
for i in 1:length(samp_pts)
rect(samp_pts[i][1], 1255-samp_pts[i][2], 9, 9, :stroke) #create 9x9 rectangles based on the starting points (x,y)
end
finish()
preview()
end
end
#=
Create Validation dataset
=#
function partition_dataset(maps, connect, valid_ratio=0.1, Shuffle=true)
"""
Create a validation set from the training set
Args:
maps: array representing the image set from which the partitioning is made.
connect: the connect associated with the provided images.
valid_ratio (optional): the portion of the data that will be used in the validation set. Default: 0.1.
shuffle (optional): whether or not to shuffle the data. Default: True.
Return:
A tuple of 4 elements (train_maps, train_connect, valid_maps, valid_connect) where:
train_maps: an array of images for the training set.
train_connect: connect associated with the images in the training set.
valid_maps: an array of images for the validation set.
valid_connect: connect associated with the images in the validation set.
"""
if Shuffle == true
indices = shuffle(collect(1:size(maps,1)))
else
indices = collect(1:size(maps,1))
end
n_training = Int(round((1.0 - valid_ratio)*length(indices)))
train_idx, valid_idx = indices[1:n_training], indices[n_training+1:end]
train_maps, valid_maps = maps[train_idx], maps[valid_idx]
train_connect, valid_connect = connect[train_idx], connect[valid_idx]
return train_maps, train_connect, valid_maps, valid_connect
end
#############
# Minibatch #
#############
#create minibatches
function make_minibatch(X, Y, idxs)
X_batch = Array{Float32}(undef, size(X[1])..., length(idxs))
for i in 1:length(idxs)
X_batch[:, :, :, i] = Float32.(X[idxs[i]])
end
#transform (9x9) to (9x9x1x#batch)
Y_batch = Array{Float32}(undef, size(Y[1])...,1, length(idxs))
for i in 1:length(idxs)
Y_batch[:, :, :, i] = Float32.(Y[idxs[i]])
end
return (X_batch, Y_batch)
end
#=
Create Train_set and validation_set
=#
function make_sets(train_maps, train_connect, valid_maps, valid_connect)
#subtract remainders to ensure all minibatches are the same length
droplast = rem(length(train_maps), batch_size)
mb_idxs = Iterators.partition(1:length(train_maps)-droplast, batch_size)
#train set in the form of batches
train_set = [make_minibatch(train_maps, train_connect, i) for i in mb_idxs]
droplast2 = rem(length(valid_maps), batch_size)
mb_idxs2 = Iterators.partition(1:length(valid_maps)-droplast2, batch_size)
#prepare validation set as one giant minibatch
validation_set = [make_minibatch(valid_maps, valid_connect, i) for i in mb_idxs2]
return train_set, validation_set
end
#########
# Model #
#########
# Augment `x`(input) a little bit here, adding in random noise.
augment(x) = x .+ gpu(0.1f0*randn(eltype(x), size(x)))
#returns a vector of all parameters used in model
paramvec(model) = vcat(map(p->reshape(p, :), params(model))...)
#check if any element is NaN or not
anynan(x) = any(isnan.(x))
#calculate L2 loss between our prediction and "y_hat" (calculated from "model(x)") and the ground truth "y". Augment the data a bit by adding gaussian random noise to images to make it more robust
function loss(x, y)
x̂ = augment(x)
ŷ = model(x̂)
return sum((y .- ŷ).^2)./prod(size(x)) #divided by the actual value
end
#Get accuracy per pixel (between true and predicted value)
accuracy(x, y) = 1 - mean(Flux.mse(model(x), y)) # (1 - mse) -> closer to 1 is better
#################################
# Run trained model on new data #
#################################
#run trained model on new data
function trained_model(data9x9)
model_on_data = [model(data9x9[i][1]) for i in 1:length(data9x9)]
return model_on_data
end
#function to stitch 2D
function stitch2d(map)
truemap = [reduce(hcat, p) for p in Iterators.partition(map, desired)]
truemap = [reduce(vcat, p) for p in Iterators.partition(truemap, desired)]
return truemap
end
#function to stitch together 3 (9x9) x 3 (9x9) to create one 27x27
function stitch4d(model_on_9x9)
#reduce 4D to 2D
mod = []
for t in model_on_9x9
tmp2 = [t[:,:,1,i] for i in 1:batch_size]
push!(mod, tmp2)
end
#reduce to one vector of arrays
mod = reduce(vcat, mod)
#hcat groups of three
stitched = [reduce(hcat, p) for p in Iterators.partition(mod, desired)]
#vcat the stitched hcats
stitchedmap = [reduce(vcat, p) for p in Iterators.partition(stitched[1:length(stitched)-1], desired)]
return stitchedmap[1:length(stitchedmap)-1]
end