-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvenice.py
196 lines (144 loc) · 5.65 KB
/
venice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 27 12:14:36 2023
@author: Tommaso Giacometti
"""
import numpy as np
import torch
import torch_geometric as pyg
import models
import networkx as nx
import utils
from time import time
import plots
import matplotlib.pyplot as plt
name = 'venice'
if name == 'venice':
path = '/Users/tommygiak/Desktop/VGAE_FNN/data/cities/venice_edge_info.txt'
n_nodes = 1840
elif name == 'bologna':
path = '/Users/tommygiak/Desktop/VGAE_FNN/data/cities/bologna_edge_info.txt'
n_nodes = 541
data = np.loadtxt(path,dtype=np.float32)
edge_number = data[:,0].astype(np.int64)
#Adj matrix
link1 = data[:,1]
link2 = data[:,4]
edge_index = np.vstack((link1,link2)).astype(np.int64)
del link1, link2
#Node features
pos1 = data[:,2:4]
pos2 = data[:,5:7]
indexes = edge_index.transpose()
tot = np.hstack((indexes,pos1,pos2))
a = []
for j in range(n_nodes):
i = j
if i in data[:,1]:
a.append([i, data[data[:,1]==i,2][0], data[data[:,1]==i,3][0]])
elif i in data[:,4]:
a.append([i, data[data[:,4]==i,5][0], data[data[:,4]==i,6][0]])
else:
raise IndexError(i)
pos = np.array(a)[:,1:].astype(np.float32)
pos_dict = {tuple(a):i for i,a in enumerate(pos)}
pos_dict = {i:a for a,i in pos_dict.items()}
edge_index = torch.from_numpy(edge_index)
pos = torch.from_numpy(pos)
edge_index = pyg.utils.to_undirected(edge_index,num_nodes=n_nodes)
data = pyg.data.Data(x=pos, edge_index=edge_index)
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}') # False
print(f'Has self-loops: {data.has_self_loops()}') # False
print(f'Is undirected: {data.is_undirected()}') # True
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
#%%
G = pyg.utils.to_networkx(data, to_undirected=True)
fig, ax = plt.subplots(dpi=1000)
centrality = nx.betweenness_centrality(G, endpoints=True)
node_size = [v * 20 for v in centrality.values()]
nx.draw_networkx_nodes(G, pos_dict, node_size = node_size, node_shape='o', alpha=0.4)
nx.draw_networkx_edges(G, pos_dict, width = 0.5, edge_color="gainsboro")
ax.set_title('Venice street rapresentation')
plt.savefig('venice.pdf')
plt.show()
#%%
degree_sequence = sorted((d for n, d in G.degree()), reverse=True)
dmax = max(degree_sequence)
fig,(ax1,ax2) = plt.subplots(1,2, figsize=(8, 4))
fig.suptitle("Degree of a random graph")
ax1.plot(degree_sequence, "b-", marker="o")
ax1.set_title("Degree Rank Plot")
ax1.set_ylabel("Degree")
ax1.set_xlabel("Rank")
ax2.bar(*np.unique(degree_sequence, return_counts=True))
ax2.set_title("Degree histogram")
ax2.set_xlabel("Degree")
ax2.set_ylabel("# of Nodes")
fig.tight_layout()
plt.show()
#%%
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = pyg.data.Data(x=pos, edge_index=edge_index)
data_model = models.Data_Venice(data, order = 3)
#VGAE
in_channels = data_model.x.shape[1]
hid_dim = 100
emb_dim = 50
autoencoder = models.VGAE(in_channels, hid_dim, emb_dim).to(device)
start_vgae = time()
print(f'{plots.Bcolors.HEADER}Training of the VGAE{plots.Bcolors.ENDC}')
lossi_VGAE = autoencoder.train_cycle(data_model, weights=False, epochs=5000, include_neg=False)#Training VGAE
stop_vgae = time()
#Data processing for the FNN
embedding = autoencoder(data_model.x, data_model.train_pos)[0].detach() #[0] -> To get only z and not logvar
data_model_fnn = models.Data_FNN(embedding, data_model)
#FNN
fnn = models.FNN(emb_dim*2).to(device)
#Train
start_fnn = time()
print(f'{plots.Bcolors.HEADER}Training of the FNN{plots.Bcolors.ENDC}')
lossi_fnn, lossi_test_fnn = fnn.train_cycle_fnn(data_model_fnn, epochs=20000)
stop_fnn = time()
#Computational times
print(f'The training of the VGAE took {stop_vgae-start_vgae} sec')
print(f'The training of the FNN took {stop_fnn-start_fnn} sec')
#Plots
#VGAE
plots.plot_loss(lossi_VGAE, mean = 5, tit='Loss of the VGAE')
plots.plot_train_distribution_VGAE(autoencoder, data_model)
plots.plot_test_distribution_VGAE(autoencoder, data_model)
#FNN
plots.plot_loss(lossi_fnn, tit = 'Loss of the FNN', mean = 500)
plots.plot_distribution_FNN(fnn, embedding, data_model_fnn, test = False)
plots.plot_distribution_FNN(fnn, embedding, data_model_fnn, test = True)
#Results
vgae_results = utils.get_argmax_VGAE(autoencoder, data_model)
fnn_results = utils.get_argmax_FNN(fnn, data_model_fnn)
utils.print_dict(vgae_results, part = 'VGAE results in the classification')
utils.print_dict(fnn_results, part = 'FNN result in the classification')
#%%
only_train_data = pyg.data.Data(x = data_model.x, edge_index=data_model.train_pos)
G1 = pyg.utils.to_networkx(only_train_data, to_undirected=True)
fig, ax = plt.subplots(dpi=1000)
centrality = nx.betweenness_centrality(G1, endpoints=True)
node_size = [v * 20 for v in centrality.values()]
nx.draw_networkx_nodes(G1, pos_dict, node_size = node_size, node_shape='o', alpha=0.4)
nx.draw_networkx_edges(G1, pos_dict, width = 0.5, edge_color="gainsboro")
ax.set_title('Venice street rapresentation with missing links')
plt.savefig('venice_missing.pdf')
plt.show()
#%%
#Reconstructed graph
link_recon = utils.reconstruct_graph(data_model, data_model_fnn, fnn)
G_recon = pyg.data.Data(x=data.x, edge_index=link_recon)
G_recon = pyg.utils.to_networkx(G_recon, to_undirected=True)
fig, ax = plt.subplots(dpi=1000)
centrality = nx.betweenness_centrality(G_recon, endpoints=True)
node_size = [v * 20 for v in centrality.values()]
nx.draw_networkx_nodes(G_recon, pos_dict, node_size = node_size, node_shape='o', alpha=0.4)
nx.draw_networkx_edges(G_recon, pos_dict, width = 0.5, edge_color="gainsboro")
ax.set_title('Venice street rapresentation (reconstruction)')
plt.savefig('venice_recon.pdf')
plt.show()