measure_activity.py

#
# Measure Activity
#
# Peter Turney, July 11, 2019
#
# Measure the activity in a run using ideas from Bedau and Packard
#
import golly as g
import model_classes as mclass
import model_functions as mfunc
import model_parameters as mparam
import numpy as np
import pickle
import os
import sys
import glob
#
# the path for Golly
#
app_dir = g.getdir("app")
#
# settings for the different layers
#
layer = 4 # the layer that we want to process now
#
if (layer == 1):
  # Layer 1, Pickle 1
  version = "layer_1_analysis_v6"
  pickle_dir = app_dir + os.path.join("Modeling Major Transitions",
    "Section 4.1", "Layer 1", "pickles1")
  pickle_basename = "log-2019-06-08-15h-04m-41s-pickle-" 
elif (layer == 2):
  # Layer 2, Pickle 1
  version = "layer_2_analysis_v6"
  pickle_dir = app_dir + os.path.join("Modeling Major Transitions",
    "Section 4.1", "Layer 2", "pickles1")
  pickle_basename = "log-2019-06-08-15h-07m-27s-pickle-" 
elif (layer == 3):
  # Layer 3, Pickle 1
  version = "layer_3_analysis_v6"
  pickle_dir = app_dir + os.path.join("Modeling Major Transitions",
    "Section 4.1", "Layer 3", "pickles1")
  pickle_basename = "log-2019-06-08-15h-25m-26s-pickle-" 
else:
  # Layer 4, Pickle 1
  version = "layer_4_analysis_v6"
  pickle_dir = app_dir + os.path.join("Modeling Major Transitions",
    "Section 4.1", "Layer 4", "pickles1")
  pickle_basename = "log-2019-06-08-15h-32m-20s-pickle-" 
#
# the path for the analysis report file
#
text_path = app_dir + os.path.join("Experiments", "exper189",
  version + ".txt")
#
# the path for the spreadsheet file (tsv - tab separated values)
#
spreadsheet_path = app_dir + os.path.join("Experiments", "exper189",
  version + ".tsv")
#
# number of top longest-living seeds to examine in detail
#
num_top_seeds = 20
#
# open files for recording results
#
# - use "0" for unbuffered file output, so we can see the results
#   immediately
#
text_file = open(text_path, "w", 0)
spreadsheet_file = open(spreadsheet_path, "w", 0)
#
# read the pickles into hash tables where the keys are seed patterns
# and the value for each key is a list of the form
# [(generation_number1, seed_rank1), (generation_number2, seed_rank2), ...]
#
# - for every seed in every generation, calculate its normalized rank in the
#   elite sample, where 1.0 = top of the elite sample and 0.0 = not in the
#   elite sample
#
# - the list for a given key (a given seed) is allowed to be incomplete; we assume
#   that any missing (generation_numberi, seed_ranki) pairs are not in the elite
#   sample and thus have a rank of 0.0
#
num_generations = mparam.num_generations
hash_seed_to_list = {}
#
for i in range(num_generations + 1):
  # load the elite sample of the population from the pickle for the i-th generation
  pickle_path = os.path.join(pickle_dir, pickle_basename + str(i) + ".bin")
  sample_handle = open(pickle_path, "rb") # rb = read binary
  sample = pickle.load(sample_handle)
  sample_handle.close()
  # the sample was generated by archive_elite() in model_functions.py, and
  # we know that it has been sorted in order of decreasing fitness by
  # the function find_top_seeds(), therefore we can calculate a
  # normalized rank score for each seed based on its position in the
  # sorted sample list
  # - we want the bottom of the list to have a normalized rank slightly above
  #   zero, so that a missing score is lower than the lowest rank in the list
  sample_size = len(sample)
  assert sample_size == mparam.elite_size
  # iterate through the sample
  for j in range(sample_size):
    # the j-th seed in the sample
    seed = sample[j]
    # the normalized rank of the j-th seed
    norm_rank = (sample_size - j) / float(sample_size)
    # convert the seed matrix into a string -- this will be the key for the hash table
    # - when we flatten the seed matrix into a string, note that two differently shaped
    # matrices might possibly flatten to the same string; therefore we prefix the
    # string with the dimensions of the given seed matrix
    # - flatten the matrix of the seed
    flat_seed = seed.cells.flatten()
    # - add in the dimensions and convert to string
    seed_string = str(seed.xspan) + " " + str(seed.yspan) + \
      " " + "".join(map(str, flat_seed))
    # update the hash table
    if seed_string in hash_seed_to_list.keys():
      # there might be two or more exact copies of a seed, although this should be
      # rare; in such cases, we take the highest value of the normalized ranks
      last_recorded_generation = hash_seed_to_list[seed_string][-1][0]
      if last_recorded_generation != i:
        # there is no conflict, so add the new pair to the list
        hash_seed_to_list[seed_string].append((i, norm_rank))
      else:
        # there is conflict, so compare the new value with the old
        # value and replace the old value if the new value is larger
        old_value = hash_seed_to_list[seed_string][-1][1]
        new_value = norm_rank
        if new_value > old_value:
          # remove the old value
          hash_seed_to_list[seed_string].pop()
          # append the new value
          hash_seed_to_list[seed_string].append((i, new_value))
        # if old_value >= new_value, no change is required
    else:
      # if seed_string is not yet in hash_seed_to_list ...
      hash_seed_to_list[seed_string] = [(i, norm_rank)]
    #
#
# now we iterate through all of the keys in hash_seed_to_list and we convert
# the list of pairs into a list of length num_generations
#
# example: - suppose the generations are           [0, 1, 2, 3, 4, 5]
#          - suppose the pairs are                 [(1, 0.8), (3, 0.2)]
#          - then the expanded fitness values are  [0.0, 0.8, 0.0, 0.2, 0.0]
#          - and the cumulative fitness values are [0.0, 0.8, 0.8, 1.0, 1.0]
#
hash_seed_to_cumulative_scores = {}
#
for seed_string in hash_seed_to_list.keys():
  pair_list = hash_seed_to_list[seed_string]
  assert len(pair_list) > 0
  cumulative_score_list = []
  cumulative_score = 0.0
  (generation, score) = pair_list.pop(0)
  for i in range(num_generations + 1):
    if i < generation:
      cumulative_score_list.append(cumulative_score)
    elif i == generation:
      cumulative_score = cumulative_score + score
      cumulative_score_list.append(cumulative_score)
      if len(pair_list) > 0:
        (generation, score) = pair_list.pop(0)
    else:
      # i > generation
      # - pair_list must be empty
      assert len(pair_list) == 0
      # in this case, we stop appending items to cumulative_score_list
      break
  # update hash_seed_to_cumulative_scores with the new list of cumulative scores
  hash_seed_to_cumulative_scores[seed_string] = cumulative_score_list
#
# find the top num_top_seeds with the highest final cumulative score
# in the last generation
#
seed_final_score_list = []
#
for seed_string in hash_seed_to_cumulative_scores.keys():
  cumulative_score_list = hash_seed_to_cumulative_scores[seed_string]
  final_score = cumulative_score_list[-1]
  seed_final_score_list.append((seed_string, final_score))
# 
sorted_seeds = sorted(seed_final_score_list, 
  key=lambda tuple: tuple[1],
  reverse = True)
#
top_seeds = sorted_seeds[:num_top_seeds]
#
# write the top seeds to spreadsheet_file
#
for top_seed in top_seeds:
  (seed_string, final_score) = top_seed
  cumulative_score_list = hash_seed_to_cumulative_scores[seed_string]
  # to make a better looking graph in the spreadsheet, replace 0.0
  # with a space
  pretty_list = []
  for score in cumulative_score_list:
    if (score == 0.0):
      pretty_list.append(" ")
    else:
      pretty_list.append(str(score))
  spreadsheet_file.write("\t".join(pretty_list) + "\n")
#
# write the seeds and their final scores to text_file
#
for (seed_string, final_score) in sorted_seeds:
  text_file.write(seed_string + " --> final score = " + str(final_score) + "\n")
#
# close the files for recording results
#
text_file.close()
spreadsheet_file.close()
#
#
#
#