-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
162 lines (137 loc) · 5.2 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import numpy as np
import scipy.sparse as sp
import torch
import itertools
from scipy import sparse
import scipy.io as sio
from scipy.sparse import csc_matrix
from scipy.sparse import lil_matrix
import scipy
from sklearn.decomposition import PCA
import torch.nn.functional as F
def adj_matrix(graph):
nodes = []
for src, v in graph.items():
nodes.extend([[src, v_] for v_ in v])
nodes.extend([[v_, src] for v_ in v])
nodes = [k for k, _ in itertools.groupby(sorted(nodes))]
nodes = np.array(nodes)
return sparse.coo_matrix((np.ones(nodes.shape[0]), (nodes[:, 0], nodes[:, 1])),
(len(graph), len(graph)))
def norm_x(x):
return np.diag(np.power(x.sum(axis=1), -1).flatten()).dot(x)
def normalize(mx):
"""Row-normalize sparse matrix"""
rowsum = np.array(mx.sum(1), dtype=np.float32)
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
mx = r_mat_inv.dot(mx)
return mx
def norm_adj_matrix(matrix):
matrix += sparse.eye(matrix.shape[0])
degree = np.array(matrix.sum(axis=1))
d_sqrt = sparse.diags(np.power(degree, -0.5).flatten())
return d_sqrt.dot(matrix).dot(d_sqrt)
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
"""Convert a scipy sparse matrix to a torch sparse tensor."""
sparse_mx = sparse_mx.tocoo().astype(np.float32)
indices = torch.from_numpy(
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
values = torch.from_numpy(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
return torch.sparse.FloatTensor(indices, values, shape)
def csr_2_sparse_tensor_tuple(csr_matrix):
if not isinstance(csr_matrix, scipy.sparse.lil_matrix):
csr_matrix = lil_matrix(csr_matrix)
coo_matrix = csr_matrix.tocoo()
indices = np.transpose(np.vstack((coo_matrix.row, coo_matrix.col)))
values = coo_matrix.data
shape = csr_matrix.shape
return indices, values, shape
def feature_compression(features, dim=200):
"""Preprcessing of features"""
features = features.toarray()
feat = lil_matrix(PCA(n_components=dim, random_state=0).fit_transform(features))
return feat.toarray()
def load_data_citation(path="data/",dataset="citation"):
file = str(path) + str(dataset)
net = sio.loadmat(file)
features, adj, labels = net['attrb'], net['network'], net['group']
if not isinstance(features, scipy.sparse.lil_matrix):
features = lil_matrix(features)
labels = np.array(labels)
'''compute PPMI'''
A_k = AggTranProbMat(adj, 3)
PPMI_ = ComputePPMI(A_k)
n_PPMI_ = MyScaleSimMat(PPMI_) # row normalized PPMI
n_PPMI_mx = lil_matrix(n_PPMI_)
X_n = sparse_mx_to_torch_sparse_tensor(n_PPMI_mx)
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
features = normalize(features.toarray())
adj = normalize(adj + sp.eye(adj.shape[0]))
features = torch.FloatTensor(features)
labels = np.argmax(labels, 1)
labels = torch.LongTensor(labels)
adj = sparse_mx_to_torch_sparse_tensor(adj)
idxs = np.random.permutation(len(features))
idx_all = torch.LongTensor(idxs)
return adj, features, labels,idx_all,X_n
def load_network(path="data/",dataset="citation"):
file = str(path) + str(dataset)
net = sio.loadmat(file)
X, A, Y = net['attrb'], net['network'], net['group']
if not isinstance(X, scipy.sparse.lil_matrix):
X = lil_matrix(X)
return A, X, Y
def accuracy(output, labels):
preds = output.max(1)[1].type_as(labels)
correct = preds.eq(labels).double()
correct = correct.sum()
return correct / len(labels)
class ConditionalEntropyLoss(torch.nn.Module):
def __init__(self):
super(ConditionalEntropyLoss, self).__init__()
def forward(self, x):
b = F.softmax(x, dim=1) * F.log_softmax(x, dim=1)
b = b.sum(dim=1)
return -1.0 * b.mean(dim=0)
def MyScaleSimMat(W):
'''L1 row norm of a matrix'''
rowsum = np.array(np.sum(W, axis=1), dtype=np.float32)
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
W = r_mat_inv.dot(W)
return W
def AggTranProbMat(G, step):
'''aggregated K-step transition probality'''
G = MyScaleSimMat(G)
G = csc_matrix.toarray(G)
A_k = G
A = G
for k in np.arange(2, step + 1):
A_k = np.matmul(A_k, G)
A = A + A_k / k
return A
def ComputePPMI(A):
'''compute PPMI, given aggregated K-step transition probality matrix as input'''
np.fill_diagonal(A, 0)
A = MyScaleSimMat(A)
(p, q) = np.shape(A)
col = np.sum(A, axis=0)
col[col == 0] = 1
PPMI = np.log((float(p) * A) / col[None, :])
IdxNan = np.isnan(PPMI)
PPMI[IdxNan] = 0
PPMI[PPMI < 0] = 0
return PPMI
def load_adj_label_for_reconstruction(dataset_name):
A,_,_ = load_network(dataset=dataset_name)
adj_label = A + sp.eye(A.shape[0])
adj_label = torch.FloatTensor(adj_label.toarray())
pos_weight = float(A.shape[0] * A.shape[0] - A.sum()) / A.sum()
pos_weight = np.array(pos_weight).reshape(1, 1)
pos_weight = torch.from_numpy(pos_weight)
norm = A.shape[0] * A.shape[0] / float((A.shape[0] * A.shape[0] - A.sum()) * 2)
return adj_label,pos_weight,norm