-
Notifications
You must be signed in to change notification settings - Fork 1
/
measure_activity.py
218 lines (218 loc) · 7.97 KB
/
measure_activity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#
# Measure Activity
#
# Peter Turney, July 11, 2019
#
# Measure the activity in a run using ideas from Bedau and Packard
#
import golly as g
import model_classes as mclass
import model_functions as mfunc
import model_parameters as mparam
import numpy as np
import pickle
import os
import sys
import glob
#
# the path for Golly
#
app_dir = g.getdir("app")
#
# settings for the different layers
#
layer = 4 # the layer that we want to process now
#
if (layer == 1):
# Layer 1, Pickle 1
version = "layer_1_analysis_v6"
pickle_dir = app_dir + os.path.join("Modeling Major Transitions",
"Section 4.1", "Layer 1", "pickles1")
pickle_basename = "log-2019-06-08-15h-04m-41s-pickle-"
elif (layer == 2):
# Layer 2, Pickle 1
version = "layer_2_analysis_v6"
pickle_dir = app_dir + os.path.join("Modeling Major Transitions",
"Section 4.1", "Layer 2", "pickles1")
pickle_basename = "log-2019-06-08-15h-07m-27s-pickle-"
elif (layer == 3):
# Layer 3, Pickle 1
version = "layer_3_analysis_v6"
pickle_dir = app_dir + os.path.join("Modeling Major Transitions",
"Section 4.1", "Layer 3", "pickles1")
pickle_basename = "log-2019-06-08-15h-25m-26s-pickle-"
else:
# Layer 4, Pickle 1
version = "layer_4_analysis_v6"
pickle_dir = app_dir + os.path.join("Modeling Major Transitions",
"Section 4.1", "Layer 4", "pickles1")
pickle_basename = "log-2019-06-08-15h-32m-20s-pickle-"
#
# the path for the analysis report file
#
text_path = app_dir + os.path.join("Experiments", "exper189",
version + ".txt")
#
# the path for the spreadsheet file (tsv - tab separated values)
#
spreadsheet_path = app_dir + os.path.join("Experiments", "exper189",
version + ".tsv")
#
# number of top longest-living seeds to examine in detail
#
num_top_seeds = 20
#
# open files for recording results
#
# - use "0" for unbuffered file output, so we can see the results
# immediately
#
text_file = open(text_path, "w", 0)
spreadsheet_file = open(spreadsheet_path, "w", 0)
#
# read the pickles into hash tables where the keys are seed patterns
# and the value for each key is a list of the form
# [(generation_number1, seed_rank1), (generation_number2, seed_rank2), ...]
#
# - for every seed in every generation, calculate its normalized rank in the
# elite sample, where 1.0 = top of the elite sample and 0.0 = not in the
# elite sample
#
# - the list for a given key (a given seed) is allowed to be incomplete; we assume
# that any missing (generation_numberi, seed_ranki) pairs are not in the elite
# sample and thus have a rank of 0.0
#
num_generations = mparam.num_generations
hash_seed_to_list = {}
#
for i in range(num_generations + 1):
# load the elite sample of the population from the pickle for the i-th generation
pickle_path = os.path.join(pickle_dir, pickle_basename + str(i) + ".bin")
sample_handle = open(pickle_path, "rb") # rb = read binary
sample = pickle.load(sample_handle)
sample_handle.close()
# the sample was generated by archive_elite() in model_functions.py, and
# we know that it has been sorted in order of decreasing fitness by
# the function find_top_seeds(), therefore we can calculate a
# normalized rank score for each seed based on its position in the
# sorted sample list
# - we want the bottom of the list to have a normalized rank slightly above
# zero, so that a missing score is lower than the lowest rank in the list
sample_size = len(sample)
assert sample_size == mparam.elite_size
# iterate through the sample
for j in range(sample_size):
# the j-th seed in the sample
seed = sample[j]
# the normalized rank of the j-th seed
norm_rank = (sample_size - j) / float(sample_size)
# convert the seed matrix into a string -- this will be the key for the hash table
# - when we flatten the seed matrix into a string, note that two differently shaped
# matrices might possibly flatten to the same string; therefore we prefix the
# string with the dimensions of the given seed matrix
# - flatten the matrix of the seed
flat_seed = seed.cells.flatten()
# - add in the dimensions and convert to string
seed_string = str(seed.xspan) + " " + str(seed.yspan) + \
" " + "".join(map(str, flat_seed))
# update the hash table
if seed_string in hash_seed_to_list.keys():
# there might be two or more exact copies of a seed, although this should be
# rare; in such cases, we take the highest value of the normalized ranks
last_recorded_generation = hash_seed_to_list[seed_string][-1][0]
if last_recorded_generation != i:
# there is no conflict, so add the new pair to the list
hash_seed_to_list[seed_string].append((i, norm_rank))
else:
# there is conflict, so compare the new value with the old
# value and replace the old value if the new value is larger
old_value = hash_seed_to_list[seed_string][-1][1]
new_value = norm_rank
if new_value > old_value:
# remove the old value
hash_seed_to_list[seed_string].pop()
# append the new value
hash_seed_to_list[seed_string].append((i, new_value))
# if old_value >= new_value, no change is required
else:
# if seed_string is not yet in hash_seed_to_list ...
hash_seed_to_list[seed_string] = [(i, norm_rank)]
#
#
# now we iterate through all of the keys in hash_seed_to_list and we convert
# the list of pairs into a list of length num_generations
#
# example: - suppose the generations are [0, 1, 2, 3, 4, 5]
# - suppose the pairs are [(1, 0.8), (3, 0.2)]
# - then the expanded fitness values are [0.0, 0.8, 0.0, 0.2, 0.0]
# - and the cumulative fitness values are [0.0, 0.8, 0.8, 1.0, 1.0]
#
hash_seed_to_cumulative_scores = {}
#
for seed_string in hash_seed_to_list.keys():
pair_list = hash_seed_to_list[seed_string]
assert len(pair_list) > 0
cumulative_score_list = []
cumulative_score = 0.0
(generation, score) = pair_list.pop(0)
for i in range(num_generations + 1):
if i < generation:
cumulative_score_list.append(cumulative_score)
elif i == generation:
cumulative_score = cumulative_score + score
cumulative_score_list.append(cumulative_score)
if len(pair_list) > 0:
(generation, score) = pair_list.pop(0)
else:
# i > generation
# - pair_list must be empty
assert len(pair_list) == 0
# in this case, we stop appending items to cumulative_score_list
break
# update hash_seed_to_cumulative_scores with the new list of cumulative scores
hash_seed_to_cumulative_scores[seed_string] = cumulative_score_list
#
# find the top num_top_seeds with the highest final cumulative score
# in the last generation
#
seed_final_score_list = []
#
for seed_string in hash_seed_to_cumulative_scores.keys():
cumulative_score_list = hash_seed_to_cumulative_scores[seed_string]
final_score = cumulative_score_list[-1]
seed_final_score_list.append((seed_string, final_score))
#
sorted_seeds = sorted(seed_final_score_list,
key=lambda tuple: tuple[1],
reverse = True)
#
top_seeds = sorted_seeds[:num_top_seeds]
#
# write the top seeds to spreadsheet_file
#
for top_seed in top_seeds:
(seed_string, final_score) = top_seed
cumulative_score_list = hash_seed_to_cumulative_scores[seed_string]
# to make a better looking graph in the spreadsheet, replace 0.0
# with a space
pretty_list = []
for score in cumulative_score_list:
if (score == 0.0):
pretty_list.append(" ")
else:
pretty_list.append(str(score))
spreadsheet_file.write("\t".join(pretty_list) + "\n")
#
# write the seeds and their final scores to text_file
#
for (seed_string, final_score) in sorted_seeds:
text_file.write(seed_string + " --> final score = " + str(final_score) + "\n")
#
# close the files for recording results
#
text_file.close()
spreadsheet_file.close()
#
#
#
#