-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy paththz-visibility-prediction.py
634 lines (523 loc) · 32.1 KB
/
thz-visibility-prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
# -*- coding: utf-8 -*-
"""
# NN prediction for AP visibility probability (i.e., not blocked and in coverage range)
#
"""
import sys
import glob
import time
import re
import numpy as np
import scipy as sp
import scipy.stats as st
import pandas as pd
import math
import random
import keras
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score
from math import sqrt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
## Example values which must be consistent with the dataset
RANDOM_SEED = 0
NUM_BS = 121 # Number of BS in the simulation
NUM_USERS_ARRAY = [1, 2] # Number of users in the simulation
PROFILE_ARRAY = [4] # For having different sets of input parameters
N_STEPS_ARRAY = [1] # Number of previous time steps considered
MAX_TS_ARRAY = [ 100 ] # Maximum number of time slots considered in the dataset
# Auxiliar function for computing confidence intervals
def mean_confidence_interval_v2(data, confidence=0.95):
if (min(data) == max(data)):
m = min(data)
h = 0
else:
a = 1.0*np.array(data)
n = len(a)
m, se = np.mean(a), st.sem(a)
h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
return '{:.3f} {:.3f} {:.3f}'.format(m, max(m-h, 0), m+h)
def trim_data(data, percent=5): # percent=5 seems reasonable
sorted_data = sorted(data)
n = len(sorted_data)
outliers = int(n*percent/100)
trimmed_data = sorted_data[outliers: n-outliers]
return trimmed_data
# Deprecated
def distance_seen(dist, randomness=False):
if dist != -1 and randomness:
dist = max(dist + 2*(random.random() - 0.5), 0.001)
if dist > 10:
dist = -1
return dist
def get_pathloss(dist, randomness = False, pathloss_threshold = 55):
# Pathloss model from https://ieeexplore.ieee.org/abstract/document/9135643
# frequency = 0.14 THz
d0 = 0.35 # m
PLd0 = 25 # dB
gamma = 2.117
sigma = 0.5712
if dist == -1:
return -1
pathloss = PLd0 + 10 * gamma * math.log10(dist / d0)
if randomness:
pathloss += float(np.random.normal(0, sigma, 1))
if pathloss > pathloss_threshold:
pathloss = -1
return pathloss
def load_dataset(filename, max_ts, NUM_USERS, NUM_BS, CURRENT_USER_POSITION_ENABLED,
PREV_TS_CURRENT_USER_POSITION_ENABLED, OTHER_USERS_POSITION_ENABLED,
OTHER_USERS_ROTATION_ENABLED, PREV_TS_OTHER_USERS_POSITION_ENABLED,
PREV_TS_OTHER_USERS_ROTATION_ENABLED, CURRENT_USER_ROTATION_ENABLED,
PREV_TS_CURRENT_USER_ROTATION_ENABLED, NEARBY_ENABLED,
NEARBY_USERS_DISTANCE_THRESHOLD, PAST_AVAILABLE_TIME_ENABLED,
PREV_TS_DISTANCES_INCLUDED_EXPLICITLY_ENABLED, DIRECTION_ENABLED):
# Read data from file
print("\n Parsing input file: {}".format(filename), file=sys.stderr)
# input_data = {ts : {id : {"pos": pos, "distances": (distances)}}}
input_data = {}
allpos_x = {}
allpos_y = {}
allpos_z = {}
allrot = {}
prev_ts = None
prev_ts_tmp = None
with open(filename) as in_file:
num_lines = 0
for line in in_file:
line = line.strip()
splitted_line = line.split(";")
ts = float(splitted_line[0])
if prev_ts != None and ts <= prev_ts:
ts = prev_ts + 0.2
if ts > max_ts:
break
all_lines.append(line)
id = int(splitted_line[1])
pos = tuple(float(x) for x in (splitted_line[2][1:-1]).split(","))
assert(len(pos) == 3)
rotation = float(splitted_line[3])
distances_seen = tuple(get_pathloss(float(x)) for x in splitted_line[4:])
distances = tuple(x != -1 for x in distances_seen)
assert(len(distances) == NUM_BS)
past_available_time = [0 for _ in range(NUM_BS)]
if prev_ts_tmp != ts:
prev_ts = prev_ts_tmp
for bs_id in range(NUM_BS):
if prev_ts != None and distances[bs_id] == True:
past_available_time[bs_id] = input_data[prev_ts][id]["past_available_time"][bs_id] + (
ts - prev_ts)
#assert(ts - prev_ts > 0.15 and ts - prev_ts < 0.25)
else:
past_available_time[bs_id] = 0
num_lines += 1
if ts not in input_data:
input_data[ts] = {}
if id not in input_data[ts]:
input_data[ts][id] = {"pos": pos, "distances": distances, "rotation": rotation,
"distances_seen": distances_seen, "past_available_time": past_available_time}
if ts not in allpos_x:
allpos_x[ts] = [-1 for _ in range(NUM_USERS)]
if ts not in allpos_y:
allpos_y[ts] = [-1 for _ in range(NUM_USERS)]
if ts not in allpos_z:
allpos_z[ts] = [-1 for _ in range(NUM_USERS)]
if ts not in allrot:
allrot[ts] = [-1 for _ in range(NUM_USERS)]
allpos_x[ts][id] = pos[0]
allpos_y[ts][id] = pos[1]
allpos_z[ts][id] = pos[2]
allrot[ts][id] = rotation
prev_ts_tmp = ts
#print("{} {}".format(prev_ts, ts))
input_data_list = []
prev_ts = None
prev_prev_ts = None
for ts in sorted(input_data.keys()):
for id in sorted(input_data[ts].keys()):
input_data_entry = []
# User position
if CURRENT_USER_POSITION_ENABLED:
input_data_entry.append(input_data[ts][id]["pos"][0])
input_data_entry.append(input_data[ts][id]["pos"][1])
input_data_entry.append(input_data[ts][id]["pos"][2])
if PREV_TS_CURRENT_USER_POSITION_ENABLED:
if prev_ts != None:
input_data_entry.append(input_data[prev_ts][id]["pos"][0])
input_data_entry.append(input_data[prev_ts][id]["pos"][1])
input_data_entry.append(input_data[prev_ts][id]["pos"][2])
# Other user positions
if OTHER_USERS_POSITION_ENABLED or OTHER_USERS_ROTATION_ENABLED:
other_users_position_rotation = []
for id2 in input_data[ts]:
if id2 != id:
if OTHER_USERS_POSITION_ENABLED and not OTHER_USERS_ROTATION_ENABLED:
other_users_position_rotation.append(
(allpos_x[ts][id2], allpos_y[ts][id2], allpos_z[ts][id2]))
elif not OTHER_USERS_ROTATION_ENABLED and OTHER_USERS_ROTATION_ENABLED:
other_users_position_rotation.append(math.sin(allrot[ts][id2]))
other_users_position_rotation.append(math.cos(allrot[ts][id2]))
elif OTHER_USERS_POSITION_ENABLED and OTHER_USERS_ROTATION_ENABLED:
other_users_position_rotation.append(
(allpos_x[ts][id2], allpos_y[ts][id2], allpos_z[ts][id2],
math.sin(allrot[ts][id2]), math.cos(allrot[ts][id2])))
other_users_position_rotation.sort()
for pos_rot in other_users_position_rotation:
for x in pos_rot:
input_data_entry.append(x)
if PREV_TS_OTHER_USERS_POSITION_ENABLED or PREV_TS_OTHER_USERS_ROTATION_ENABLED:
if prev_ts != None:
other_users_position_rotation = []
for id2 in input_data[prev_ts]:
if id2 != id:
if PREV_TS_OTHER_USERS_POSITION_ENABLED and not PREV_TS_OTHER_USERS_ROTATION_ENABLED:
other_users_position_rotation.append(
(allpos_x[prev_ts][id2], allpos_y[prev_ts][id2], allpos_z[prev_ts][id2]))
elif not PREV_TS_OTHER_USERS_ROTATION_ENABLED and PREV_TS_OTHER_USERS_ROTATION_ENABLED:
other_users_position_rotation.append(
(math.sin(allrot[prev_ts][id2])))
other_users_position_rotation.append(
(math.cos(allrot[prev_ts][id2])))
elif PREV_TS_OTHER_USERS_POSITION_ENABLED and PREV_TS_OTHER_USERS_ROTATION_ENABLED:
other_users_position_rotation.append(
(allpos_x[prev_ts][id2], allpos_y[prev_ts][id2], allpos_z[prev_ts][id2],
math.sin(allrot[prev_ts][id2]), math.cos(allrot[prev_ts][id2])))
other_users_position_rotation.sort()
for pos_rot in other_users_position_rotation:
for x in pos_rot:
input_data_entry.append(x)
# User rotation
if CURRENT_USER_ROTATION_ENABLED:
input_data_entry.append(math.sin(input_data[ts][id]["rotation"]))
input_data_entry.append(math.cos(input_data[ts][id]["rotation"]))
# User rotation
if PREV_TS_CURRENT_USER_ROTATION_ENABLED:
if prev_ts != None:
input_data_entry.append(math.sin(input_data[prev_ts][id]["rotation"]))
input_data_entry.append(math.cos(input_data[prev_ts][id]["rotation"]))
# Nearby users
if NEARBY_ENABLED:
nearby_users = 0
for id2 in input_data[ts]:
if id2 != id:
distance = math.sqrt(((input_data[ts][id]["pos"][0] - allpos_x[ts][id2]) ** 2) +
((input_data[ts][id]["pos"][1] - allpos_y[ts][id2]) ** 2) +
((input_data[ts][id]["pos"][2] - allpos_z[ts][id2]) ** 2))
if distance <= NEARBY_USERS_DISTANCE_THRESHOLD:
nearby_users += 1
input_data_entry.append(nearby_users)
if PAST_AVAILABLE_TIME_ENABLED:
# NOTE: We cannot provide the time available from the current timestamp but the previous one
if prev_ts != None:
# if id == 0:
# print("prev_ts = {}, distances = {}, past_avail_time = {}".format(prev_ts, input_data[prev_ts][id]["distances_seen"], input_data[prev_ts][id]["past_available_time"]))
for time_available in input_data[prev_ts][id]["past_available_time"]:
input_data_entry.append(time_available)
# DEBUG: Experimental for explicitly including the value of the prev ts
if PREV_TS_DISTANCES_INCLUDED_EXPLICITLY_ENABLED:
if prev_ts != None:
for distance in input_data[prev_ts][id]["distances_seen"]:
input_data_entry.append(distance)
if DIRECTION_ENABLED:
if prev_ts != None and prev_prev_ts != None:
for bs_id in range(NUM_BS):
distance_prev_prev = input_data[prev_prev_ts][id]["distances_seen"][bs_id]
distance_prev = input_data[prev_ts][id]["distances_seen"][bs_id]
# DEBUG EXPERIMENTAL: Try to introduce current direction rather than previous (e.g., assuming we use a gyroscope)
# distance_prev_prev = input_data[prev_ts][id]["distances_seen"][bs_id]
# distance_prev = input_data[ts][id]["distances_seen"][bs_id]
# if distance_prev != -1 and distance_prev_prev != -1:
input_data_entry.append(
distance_prev - distance_prev_prev)
# AP visibility -> OUTPUT variable
for distance in input_data[ts][id]["distances"]:
input_data_entry.append(distance)
input_data_list.append(input_data_entry)
prev_prev_ts = prev_ts
prev_ts = ts
print("Parsed input file", file=sys.stderr)
return input_data_list
# convert series to supervised learning (from: https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/)
def series_to_supervised(data, n_in=1, n_out=1, diff_ts=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i * diff_ts))
names += [('var%d(t-%d)' % (j+1, i * diff_ts)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i * diff_ts))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i * diff_ts))
for j in range(n_vars)]
# put it all together
agg = pd.concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
for MAX_TS in MAX_TS_ARRAY:
for NUM_USERS_TRAIN in NUM_USERS_ARRAY:
for n_steps in N_STEPS_ARRAY:
for PROFILE in PROFILE_ARRAY:
FILENAME_TRAIN = "thz_datasets/OK/SimData-{}-users.csv".format(NUM_USERS_TRAIN)
CURRENT_USER_POSITION_ENABLED = False
OTHER_USERS_POSITION_ENABLED = False
CURRENT_USER_ROTATION_ENABLED = False
OTHER_USERS_ROTATION_ENABLED = False
PREV_TS_CURRENT_USER_POSITION_ENABLED = False
PREV_TS_OTHER_USERS_POSITION_ENABLED = False
PREV_TS_CURRENT_USER_ROTATION_ENABLED = False
PREV_TS_OTHER_USERS_ROTATION_ENABLED = False
if (PROFILE == 1):
PREV_TS_CURRENT_USER_POSITION_ENABLED = True
PREV_TS_OTHER_USERS_POSITION_ENABLED = True
PREV_TS_CURRENT_USER_ROTATION_ENABLED = True
PREV_TS_OTHER_USERS_ROTATION_ENABLED = True
elif (PROFILE == 2):
PREV_TS_CURRENT_USER_POSITION_ENABLED = True
PREV_TS_OTHER_USERS_POSITION_ENABLED = False
PREV_TS_CURRENT_USER_ROTATION_ENABLED = True
PREV_TS_OTHER_USERS_ROTATION_ENABLED = False
elif (PROFILE == 3):
PREV_TS_CURRENT_USER_POSITION_ENABLED = True
PREV_TS_OTHER_USERS_POSITION_ENABLED = False
PREV_TS_CURRENT_USER_ROTATION_ENABLED = False
PREV_TS_OTHER_USERS_ROTATION_ENABLED = False
elif (PROFILE == 4):
PREV_TS_CURRENT_USER_POSITION_ENABLED = False
PREV_TS_OTHER_USERS_POSITION_ENABLED = False
PREV_TS_CURRENT_USER_ROTATION_ENABLED = False
PREV_TS_OTHER_USERS_ROTATION_ENABLED = False
NEARBY_ENABLED = False
PREV_TS_DISTANCES_INCLUDED_EXPLICITLY_ENABLED = False # This set to false means distances/signal values
# are not considered, but just visibility or not
DIRECTION_ENABLED = False
PAST_AVAILABLE_TIME_ENABLED = False
# Parameters for the model
# Distance threshold for nearby users (in meters)
NEARBY_USERS_DISTANCE_THRESHOLD = 0.5
n_features = NUM_BS # AP visibility -> output variable
if (OTHER_USERS_POSITION_ENABLED or OTHER_USERS_ROTATION_ENABLED or
PREV_TS_OTHER_USERS_POSITION_ENABLED or PREV_TS_OTHER_USERS_ROTATION_ENABLED):
if NUM_USERS_TRAIN != NUM_USERS_TEST:
continue
else:
NUM_USERS = NUM_USERS_TRAIN
n_features += (3 if CURRENT_USER_POSITION_ENABLED else 0)
n_features += (((NUM_USERS - 1) * 3) if OTHER_USERS_POSITION_ENABLED else 0)
n_features += (2 if CURRENT_USER_ROTATION_ENABLED else 0)
n_features += (((NUM_USERS - 1) * 2) if OTHER_USERS_ROTATION_ENABLED else 0)
n_features += (3 if PREV_TS_CURRENT_USER_POSITION_ENABLED else 0)
n_features += (((NUM_USERS - 1) * 3) if PREV_TS_OTHER_USERS_POSITION_ENABLED else 0)
n_features += (2 if PREV_TS_CURRENT_USER_ROTATION_ENABLED else 0)
n_features += (((NUM_USERS - 1) * 2) if PREV_TS_OTHER_USERS_ROTATION_ENABLED else 0)
n_features += (NUM_BS if PREV_TS_DISTANCES_INCLUDED_EXPLICITLY_ENABLED else 0)
n_features += (1 if NEARBY_ENABLED else 0)
n_features += (NUM_BS if DIRECTION_ENABLED else 0)
n_features += (NUM_BS if PAST_AVAILABLE_TIME_ENABLED else 0)
n_features_predict = NUM_BS # AP visibility
all_lines = []
random.seed(RANDOM_SEED)
input_data_train = load_dataset(FILENAME_TRAIN, MAX_TS, NUM_USERS_TRAIN, NUM_BS, CURRENT_USER_POSITION_ENABLED,
PREV_TS_CURRENT_USER_POSITION_ENABLED, OTHER_USERS_POSITION_ENABLED,
OTHER_USERS_ROTATION_ENABLED, PREV_TS_OTHER_USERS_POSITION_ENABLED,
PREV_TS_OTHER_USERS_ROTATION_ENABLED, CURRENT_USER_ROTATION_ENABLED,
PREV_TS_CURRENT_USER_ROTATION_ENABLED, NEARBY_ENABLED,
NEARBY_USERS_DISTANCE_THRESHOLD, PAST_AVAILABLE_TIME_ENABLED,
PREV_TS_DISTANCES_INCLUDED_EXPLICITLY_ENABLED, DIRECTION_ENABLED)
input_data_df = pd.DataFrame(input_data_train)
input_data_train = input_data_df.iloc[:, :].values
input_data_train = input_data_train.astype('float32')
reframed_train = series_to_supervised(input_data_train, n_steps, 1, NUM_USERS_TRAIN)
# Drop features we don't want to predict
input_data_train = reframed_train
start_time = time.time()
scaler = MinMaxScaler(feature_range=(0, 1))
scaler_train = MinMaxScaler(feature_range=(0, 1))
values_train = np.array(input_data_train)
train_size = int(0.7 * len(input_data_train))
validation_size = int(0.1 * len(input_data_train))
train = values_train[:train_size, :]
validation = values_train[train_size:(train_size + validation_size), :]
# split into input and outputs
train_X, train_y = train[:, :-
n_features_predict], train[:, -n_features_predict:]
train_X = scaler_train.fit_transform(train_X)
validation_X, validation_y = validation[:, :-
n_features_predict], validation[:, -n_features_predict:]
validation_X = scaler_train.transform(validation_X)
# Create network
model = Sequential()
model.add(Dense(units=1000, input_shape=(n_steps * n_features +
(n_features - n_features_predict),), activation='relu'))
model.add(Dense(units=1000, activation="relu")) # hard_sigmoid
model.add(Dense(n_features_predict, activation="sigmoid")) # sigmoid # hard_sigmoid
model.compile(loss='binary_crossentropy',
optimizer='sgd', metrics=['accuracy'])
# fit network
history = model.fit(train_X, train_y, epochs=750, batch_size=100, # epochs=1000
shuffle=False, validation_data=(validation_X, validation_y), verbose=False) # validation_data=(test_X, test_y)
##########################################
### make a prediction for training set ###
##########################################
X = train_X
y = train_y
yhat = model.predict(X) # model.predict(X)
# invert scaling for forecast
inv_yhat = np.concatenate((X, yhat), axis=1)
inv_yhat = inv_yhat[:, -n_features_predict:]
y_reshaped = y.reshape(y.shape[0], y.shape[1])
inv_y = np.concatenate((X, y_reshaped), axis=1)
inv_y = inv_y[:, -n_features_predict:]
# Convert float to bool
inv_y = np.array([(x > 0.5) for x in inv_y])
inv_yhat = np.array([(x > 0.5) for x in inv_yhat])
inv_y = inv_y.reshape(inv_y.shape[0] * inv_y.shape[1], 1)
inv_yhat = inv_yhat.reshape(inv_yhat.shape[0] * inv_yhat.shape[1], 1)
precision = precision_score(inv_y, inv_yhat)
print("Training precision: {:.3f}".format(precision), file=sys.stderr)
recall = recall_score(inv_y, inv_yhat)
print("Training recall: {:.3f}".format(recall), file=sys.stderr)
f1 = f1_score(inv_y, inv_yhat)
print("Training f1-score: {:.3f}".format(f1), file=sys.stderr)
accuracy = accuracy_score(inv_y, inv_yhat)
print("Training accuracy: {:.3f}".format(accuracy), file=sys.stderr)
# Process the different test sets
for NUM_USERS_TEST in NUM_USERS_ARRAY:
FILENAME_TEST = "thz_datasets/OK/SimData-{}-users.csv".format(NUM_USERS_TEST)
OUTFILENAME = "thz_datasets/output-{}-users_train-{}-users_test-{}-steps-{}-profile-test.csv".format(NUM_USERS_TRAIN, NUM_USERS_TEST, n_steps, PROFILE)
print("\n SCENARIO: max_ts = {}, users_train = {}, users_test = {}, steps = {}, profile = {}".format(MAX_TS, NUM_USERS_TRAIN, NUM_USERS_TEST, n_steps, PROFILE), file=sys.stderr)
input_data_test = load_dataset(FILENAME_TEST, MAX_TS, NUM_USERS_TEST, NUM_BS, CURRENT_USER_POSITION_ENABLED,
PREV_TS_CURRENT_USER_POSITION_ENABLED, OTHER_USERS_POSITION_ENABLED,
OTHER_USERS_ROTATION_ENABLED, PREV_TS_OTHER_USERS_POSITION_ENABLED,
PREV_TS_OTHER_USERS_ROTATION_ENABLED, CURRENT_USER_ROTATION_ENABLED,
PREV_TS_CURRENT_USER_ROTATION_ENABLED, NEARBY_ENABLED,
NEARBY_USERS_DISTANCE_THRESHOLD, PAST_AVAILABLE_TIME_ENABLED,
PREV_TS_DISTANCES_INCLUDED_EXPLICITLY_ENABLED, DIRECTION_ENABLED)
input_data_df = pd.DataFrame(input_data_test)
input_data_test = input_data_df.iloc[:, :].values
input_data_test = input_data_test.astype('float32')
reframed_test = series_to_supervised(input_data_test, n_steps, 1, NUM_USERS_TEST)
input_data_test = reframed_test
test_size = int(0.2 * len(input_data_test))
values_test = np.array(input_data_test)
test = values_test[-test_size:, :]
all_lines_test = all_lines[(-len(test)):]
test_X, test_y = test[:, :-n_features_predict], test[:, -n_features_predict:]
test_X = scaler_train.fit_transform(test_X)
######################################
### make a prediction for test set ###
######################################
X = test_X
y = test_y
yhat = model.predict(X) # model.predict(X)
# invert scaling for forecast
inv_yhat = np.concatenate((X, yhat), axis=1)
inv_yhat = inv_yhat[:, -n_features_predict:]
y_reshaped = y.reshape(y.shape[0], y.shape[1])
inv_y = np.concatenate((X, y_reshaped), axis=1)
inv_y = inv_y[:, -n_features_predict:]
inv_yhat_probs = inv_yhat
# Convert float to bool
inv_y = np.array([(x > 0.5) for x in inv_y])
inv_yhat = np.array([(x > 0.5) for x in inv_yhat])
inv_y_test = inv_y
inv_yhat_test = inv_yhat
inv_y = inv_y.reshape(inv_y.shape[0] * inv_y.shape[1], 1)
inv_yhat = inv_yhat.reshape(inv_yhat.shape[0] * inv_yhat.shape[1], 1)
inv_y_test_reshaped = inv_y
inv_yhat_test_reshaped = inv_yhat
test_precision = precision_score(inv_y, inv_yhat)
print("Test precision: {:.3f}".format(test_precision), file=sys.stderr)
test_recall = recall_score(inv_y, inv_yhat)
print("Test recall: {:.3f}".format(test_recall), file=sys.stderr)
test_f1 = f1_score(inv_y, inv_yhat)
print("Test f1-score: {:.3f}".format(test_f1), file=sys.stderr)
test_accuracy = accuracy_score(inv_y, inv_yhat)
print("Test accuracy: {:.3f}".format(test_accuracy), file=sys.stderr)
inv_y = inv_y_test[NUM_USERS_TEST:]
inv_y_no_reshaped = inv_y
inv_y = inv_y.reshape(inv_y.shape[0] * inv_y.shape[1], 1)
naive_y = inv_y_test[:-NUM_USERS_TEST]
naive_y_no_reshaped = naive_y
naive_y = naive_y.reshape(naive_y.shape[0] * naive_y.shape[1], 1)
precision = precision_score(inv_y, naive_y)
print("", file=sys.stderr)
print("Naive test precision: {:.3f}".format(precision), file=sys.stderr)
recall = recall_score(inv_y, naive_y)
print("Naive test recall: {:.3f}".format(recall), file=sys.stderr)
f1 = f1_score(inv_y, naive_y)
print("Naive test f1-score: {:.3f}".format(f1), file=sys.stderr)
accuracy = accuracy_score(inv_y, naive_y)
print("Naive test accuracy: {:.3f}".format(accuracy), file=sys.stderr)
# AVAILABILITY METRIC
# NN-based: Pick the one with the highest prob
chosen_bs_ids = [np.argmax(x) for x in inv_yhat_probs[NUM_USERS_TEST:]]
availability = [inv_y_no_reshaped[i][chosen_bs_ids[i]]
for i in range(len(chosen_bs_ids))]
print("\nTest availability: {:.3f}".format(
sum(availability) / len(availability)), file=sys.stderr)
test_availability = availability
# Naive: Pick random one between the True ones in the previous interval
chosen_bs_ids = [(random.choice([i for i in range(len(x)) if x[i] == True])
if True in x else 0) for x in naive_y_no_reshaped]
availability = [inv_y_no_reshaped[i][chosen_bs_ids[i]]
for i in range(len(chosen_bs_ids))]
print("Naive test availability: {:.3f}".format(
sum(availability) / len(availability)), file=sys.stderr)
naive_availability = availability
# Ideal: Pick a real True one
chosen_bs_ids = [np.argmax(x) for x in inv_y_test]
availability = [inv_y_test[i][chosen_bs_ids[i]]
for i in range(len(chosen_bs_ids))]
print("Ideal test availability: {:.3f}".format(
sum(availability) / len(availability)), file=sys.stderr)
ideal_availability = availability
with open(OUTFILENAME, 'w') as outfile:
for i in range(len(all_lines_test)):
# To have binary output
# print("{};{}".format(all_lines_test[i], ";".join(
# str(int(x)) for x in inv_yhat_test[i])), file=outfile)
# To have [0, 1] probability output
print("{};{}".format(all_lines_test[i], ";".join(
str(float(x)) for x in inv_yhat_probs[i])), file=outfile)
# aggregate by timestamp
test_availability_trimmed = test_availability[:(len(test_availability)-(len(test_availability) % NUM_USERS_TEST))]
naive_availability_trimmed = naive_availability[:(len(naive_availability)-(len(naive_availability) % NUM_USERS_TEST))]
ideal_availability_trimmed = ideal_availability[:(len(ideal_availability)-(len(ideal_availability) % NUM_USERS_TEST))]
test_availability_aggregated = [sum(x)/len(x) for x in np.reshape(test_availability_trimmed, (-1, NUM_USERS_TEST))]
naive_availability_aggregated = [sum(x)/len(x) for x in np.reshape(naive_availability_trimmed, (-1, NUM_USERS_TEST))]
ideal_availability_aggregated = [sum(x)/len(x) for x in np.reshape(ideal_availability_trimmed, (-1, NUM_USERS_TEST))]
test_availability_trimmed = test_availability[:(len(test_availability)-(len(test_availability) % 10))]
inv_y_test_reshaped_trimmed = inv_y_test_reshaped[:(len(inv_y_test_reshaped)-(len(inv_y_test_reshaped) % (NUM_USERS_TEST * NUM_BS)))]
inv_yhat_test_reshaped_trimmed = inv_yhat_test_reshaped[:(len(inv_yhat_test_reshaped)-(len(inv_yhat_test_reshaped) % (NUM_USERS_TEST * NUM_BS)))]
inv_y_test_aggregated = np.reshape(inv_y_test_reshaped_trimmed, (-1, (NUM_USERS_TEST * NUM_BS)))
inv_yhat_test_aggregated = np.reshape(inv_yhat_test_reshaped_trimmed, (-1, (NUM_USERS_TEST * NUM_BS)))
test_precision_aggregated = [precision_score(y, yhat) for y, yhat in zip(inv_y_test_aggregated, inv_yhat_test_aggregated)]
test_recall_aggregated = [recall_score(y, yhat) for y, yhat in zip(inv_y_test_aggregated, inv_yhat_test_aggregated)]
test_f1_aggregated = [f1_score(y, yhat) for y, yhat in zip(inv_y_test_aggregated, inv_yhat_test_aggregated)]
test_accuracy_aggregated = [accuracy_score(y, yhat) for y, yhat in zip(inv_y_test_aggregated, inv_yhat_test_aggregated)]
test_precision_mci = mean_confidence_interval_v2(test_precision_aggregated)
test_recall_mci = mean_confidence_interval_v2(test_recall_aggregated)
test_f1_mci = mean_confidence_interval_v2(test_f1_aggregated)
test_accuracy_mci = mean_confidence_interval_v2(test_accuracy_aggregated)
test_availability_mci = mean_confidence_interval_v2(test_availability_aggregated)
naive_availability_mci = mean_confidence_interval_v2(naive_availability_aggregated)
ideal_availability_mci = mean_confidence_interval_v2(ideal_availability_aggregated)
print("test_precision_mci = {}".format(test_precision_mci), file=sys.stderr)
print("test_recall_mci = {}".format(test_recall_mci), file=sys.stderr)
print("test_f1_mci = {}".format(test_f1_mci), file=sys.stderr)
print("test_accuracy_mci = {}".format(test_accuracy_mci), file=sys.stderr)
print("test_availability_mci = {}".format(test_availability_mci), file=sys.stderr)
print("naive_availability_mci = {}".format(naive_availability_mci), file=sys.stderr)
print("ideal_availability_mci = {}".format(ideal_availability_mci), file=sys.stderr)
print("{} {} {} {} {} {} {} {} {} {} {} {} {}".format(PROFILE, NUM_USERS_TRAIN, NUM_USERS_TEST, NUM_BS, n_steps, MAX_TS,
test_precision_mci, test_recall_mci, test_f1_mci,
test_accuracy_mci, test_availability_mci,
naive_availability_mci, ideal_availability_mci))