-
Notifications
You must be signed in to change notification settings - Fork 0
/
EQO_ga.py
104 lines (87 loc) · 4.45 KB
/
EQO_ga.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
from scipy.stats import variation
from geneticalgorithm import geneticalgorithm as ga
def EQO_ga(pattern=None, M=None, y=None, pk=None, Nmax=None, amin=0, amax=1, maxIter=100, popSize=50, N_keep = False):
if y is None:
y = np.ones((M.shape[0],))
pattern = "u"
if pk is None:
pk = np.zeros((M.shape[1],))
if Nmax is None:
Nmax = M.shape[1]
an = np.mean(M, axis=0)
m = M.shape[0]
n = M.shape[1]
if N_keep == True:
pen = 1000
else:
pen = np.sqrt(np.finfo(float).max)
def c0(x):
return 1 if np.min(x - pk) <0 else 0
def c1(x):
return np.sum(x) - Nmax
def c2(x):
return amin - np.dot(an, x)
def c3(x):
return np.dot(an, x) - amax
def c4(x):
return abs(np.sum(x) - Nmax)
if pattern == "c":
def fitness(x):
M0 = M - np.mean(M, axis=0)
y0 = y - np.mean(y)
P = np.dot(M0.T, M0)
Q2 = np.dot(M0.T, y0)
if N_keep == True:
return -(np.dot(x.T, Q2) / np.sqrt(np.dot(np.dot(x.T, P), x)) - pen * max(c4(x), 0) - pen * c0(x))
else:
return -(np.dot(x.T, Q2) / np.sqrt(np.dot(np.dot(x.T, P), x)) - pen * max(c1(x), 0) - pen * c0(x))
elif pattern == "d":
def fitness(x):
M0 = M - np.mean(M, axis=0)
y0 = y # in here, y is a m*c matrix where c reprensents number of clusters
L = np.diag(1 / np.sqrt(np.sum(y0, axis=0)))
P = np.dot(M0.T, M0)
Q = np.dot(np.dot(M0.T, y0), L) @ np.dot(L, y0.T) @ M0
if N_keep == True:
return -(np.dot(np.dot(x.T, Q), x) / np.dot(np.dot(x.T, P), x) - pen * max(c4(x), 0) - pen * c0(x))
else:
return -(np.dot(np.dot(x.T, Q), x) / np.dot(np.dot(x.T, P), x) - pen * max(c1(x), 0) - pen * c0(x))
elif pattern == "u":
def fitness(x):
M0 = M
e = np.ones((m, 1))
P = np.dot(M0.T, M0) - ((2 / n) * np.dot(np.dot(M0.T, e), np.dot(e.T, M0))) + ((1 / (n ** 2)) * np.dot(np.dot(M0.T, e), np.dot(np.dot(e, e.T), M0)))
Q = (1 / (n ** 2)) * np.dot(np.dot(M0.T, e), np.dot(e.T, M0))
return -(np.dot(np.dot(x.T, Q), x) / np.dot(np.dot(x.T, P), x) - pen * max(c1(x), 0) - pen * max(c2(x), 0) - pen * max(c3(x), 0) - pen * c0(x))
algorithm = ga(function=fitness, dimension=n, variable_type='bool', variable_boundaries=[(0, 1)]*n,
algorithm_parameters={'max_num_iteration':maxIter,\
'population_size':popSize,\
'mutation_probability':0.1,\
'elit_ratio': 0.01,\
'crossover_probability': 0.5,\
'parents_portion': 0.3,\
'crossover_type':'uniform',\
'max_iteration_without_improv':100})
algorithm.run()
x = algorithm.output_dict['variable']
fitness_val = algorithm.output_dict['function']
if pattern == "c":
members = np.array([col for col, val in zip(M.T, x) if val == 1])
abundance = np.sum(np.hstack((np.zeros((m, 2)), M[:, x == 1])), axis=1)
performance = np.corrcoef(abundance, y)[0, 1]
return {'fitness': fitness_val, 'x': x, 'members': members, 'abundance': abundance, 'performance': performance}
elif pattern == "d":
solution1 = algorithm.output_dict['variable']
members = np.array([col for col, val in zip(M.T, solution1) if val == 1])
abundance = np.sum(np.hstack((np.zeros((m, 2)), M[:, solution1 == 1])), axis=1)
s = abundance - np.mean(abundance)
y0 = y # in here, y is a m*c matrix where c reprensents number of clusters
L = np.diag(1 / np.sqrt(np.sum(y0, axis=0)))
R2 = np.dot(np.dot(s.T, y), L) @ np.dot(L, y.T) @ s / np.dot(s.T, s)
return {'fitness': fitness_val, 'x': solution1, 'members': members, 'abundance': abundance, 'performance': R2}
elif pattern == "u":
members = np.array([col for col, val in zip(M.T, x) if val == 1])
abundance = np.sum(np.hstack((np.zeros((m, 2)), M[:, x == 1])), axis=1)
CV = variation(abundance)
return {'fitness': fitness_val, 'x': x, 'members': members, 'abundance': abundance, 'performance': CV}