Skip to content

Commit

Permalink
DextranShape uses more splines and resampling to get the offset, lowe…
Browse files Browse the repository at this point in the history
…r and upper bounds much better which allows the offset calculation to be more accurate (previously the system was heavily biased towards the sampling time points which means the space was not continuous for the kernel density estimator)

Added an alt line format for writing out generation data to switch to 1-score when the score is very close to 1 (makes it easier to see progress)
  • Loading branch information
Immudzen committed Jan 6, 2020
1 parent 0130ac3 commit a74e036
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 38 deletions.
139 changes: 103 additions & 36 deletions CADETMatch/scores/dextranShape.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,69 +17,55 @@

def run(sim_data, feature):
"special score designed for dextran. This looks at only the front side of the peak up to the maximum slope and pins a value at the elbow in addition to the top"
exp_time_values = feature['time']
max_value = feature['max_value']

selected = feature['selected']

sim_time_values, sim_data_values = util.get_times_values(sim_data['simulation'], feature)

if max(sim_data_values) < max_value: #the system has no point higher than the value we are looking for
#remove hard failure
max_value = max(sim_data_values)

exp_time_values = exp_time_values[selected]
exp_time_zero = feature['exp_time_zero']
exp_data_zero = feature['exp_data_zero']

sim_data_zero = cut_front(sim_time_values, sim_data_values, exp_time_zero,
feature['min_value_front'], feature['max_value_front'],
feature['smoothing_factor'], feature['critical_frequency'])

pearson, diff_time = score.pearson_spline(exp_time_zero, exp_data_zero, sim_data_zero)

min_index = numpy.argmax(sim_data_values >= 1e-3*max_value)
max_index = numpy.argmax(sim_data_values >= max_value)

sim_data_zero = numpy.zeros(len(sim_data_values))
sim_data_zero[min_index:max_index+1] = sim_data_values[min_index:max_index+1]

pearson, diff_time = score.pearson_spline(exp_time_values, sim_data_zero, exp_data_zero)
exp_data_zero_sse = feature['exp_data_zero_sse']
sim_data_zero_sse = scipy.interpolate.InterpolatedUnivariateSpline(exp_time_zero, sim_data_zero, ext=1)(sim_time_values)

temp = [pearson,
feature['offsetTimeFunction'](numpy.abs(diff_time)),
]

data = (temp, util.sse(sim_data_zero, exp_data_zero), len(sim_data_zero),
sim_time_values, sim_data_zero, exp_data_zero, [1.0 - i for i in temp])
data = (temp, util.sse(sim_data_zero_sse, exp_data_zero_sse), len(sim_data_zero_sse),
sim_time_values, sim_data_zero_sse, exp_data_zero_sse, [1.0 - i for i in temp])

return data

def setup(sim, feature, selectedTimes, selectedValues, CV_time, abstol, cache):
temp = {}
#change the stop point to be where the max positive slope is along the searched interval
name = '%s_%s' % (sim.root.experiment_name, feature['name'])
s, crit_fs = smoothing.find_smoothing_factors(selectedTimes, selectedValues, name, cache)
values = smoothing.smooth_data_derivative(selectedTimes, selectedValues, crit_fs, s)

smooth_value = smoothing.smooth_data(selectedTimes, selectedValues, crit_fs, s)

max_index = numpy.argmax(values)
max_time = selectedTimes[max_index]
max_value = smooth_value[max_index]

min_index = numpy.argmax(smooth_value >= 1e-3*max_value)
min_time = selectedTimes[min_index]
min_value = smooth_value[min_index]

exp_data_zero = numpy.zeros(len(smooth_value))
exp_data_zero[min_index:max_index+1] = smooth_value[min_index:max_index+1]
exp_time_zero, exp_data_zero, min_time, min_value, max_time, max_value, s, crit_fs = cut_front_find(selectedTimes, selectedValues, name, cache)

multiprocessing.get_logger().info("Dextran %s start: %s stop: %s max value: %s", name,
min_time, max_time, max_value)

exp_data_zero_sse = scipy.interpolate.InterpolatedUnivariateSpline(exp_time_zero, exp_data_zero, ext=1)(selectedTimes)

temp['min_time'] = feature['start']
temp['max_time'] = feature['stop']
temp['max_value'] = max_value

temp['min_time_front'] = min_time
temp['min_value_front'] = min_value
temp['max_time_front'] = max_time
temp['max_value_front'] = max_value

temp['exp_time_zero'] = exp_time_zero
temp['exp_data_zero'] = exp_data_zero
temp['exp_data_zero_sse'] = exp_data_zero_sse
temp['offsetTimeFunction'] = score.time_function_decay_cv(CV_time, selectedTimes, max_time)
temp['peak_max'] = max_value
temp['smoothing_factor'] = s
temp['critical_frequency'] = crit_fs
temp['smooth_value'] = smooth_value
return temp

def headers(experimentName, feature):
Expand All @@ -89,7 +75,88 @@ def headers(experimentName, feature):
]
return temp

def cut_front_find(times, values, name, cache):
s, crit_fs = smoothing.find_smoothing_factors(times, values, name, cache)
values_der = smoothing.smooth_data_derivative(times, values, crit_fs, s)

smooth_value = smoothing.smooth_data(times, values, crit_fs, s)

spline_der = scipy.interpolate.InterpolatedUnivariateSpline(times, values_der, ext=1)
spline = scipy.interpolate.InterpolatedUnivariateSpline(times, smooth_value, ext=1)

max_index = numpy.argmax(values)
max_time = times[max_index]

def goal(time):
return -spline_der(time)

result = scipy.optimize.minimize(goal, max_time, method='powell')

max_time = float(result.x)
max_value = spline(float(result.x))

min_index = numpy.argmax(smooth_value >= 1e-2*max_value)
min_time = times[min_index]

def goal(time):
return abs(spline(time)-1e-2*max_value)

result = scipy.optimize.minimize(goal, min_time, method='powell')

min_time = float(result.x)
min_value = spline(float(result.x))

#resample to 100 points/second
needed_points = int( (times[-1] - times[0]) * 100)

new_times = numpy.linspace(times[0], times[-1], needed_points)
new_values = spline(new_times)

max_index = numpy.argmax(new_values >= max_value)
min_index = numpy.argmax(new_values >= min_value)

data_zero = numpy.zeros(needed_points)

data_zero[min_index:max_index+1] = new_values[min_index:max_index+1]

return new_times, data_zero, min_time, min_value, max_time, max_value, s, crit_fs

def cut_front(times, values, new_times, min_value, max_value, s, crit_fs):
smooth_value = smoothing.smooth_data(times, values, crit_fs, s)

spline = scipy.interpolate.InterpolatedUnivariateSpline(times, smooth_value, ext=1)

max_index = numpy.argmax(values >= max_value)
max_time = times[max_index]

def goal(time):
return abs(spline(time)-max_value)

result = scipy.optimize.minimize(goal, max_time, method='powell')

max_time = float(result.x)
max_value = spline(float(result.x))

min_index = numpy.argmax(values >= min_value)
min_time = times[min_index]

def goal(time):
return abs(spline(time)-min_value)

result = scipy.optimize.minimize(goal, min_time, method='powell')

min_time = float(result.x)
min_value = spline(float(result.x))

new_values = spline(new_times)

max_index = numpy.argmax(new_values >= max_value)
min_index = numpy.argmax(new_values >= min_value)

data_zero = numpy.zeros(len(new_times))
data_zero[min_index:max_index+1] = new_values[min_index:max_index+1]

return data_zero



Expand Down
10 changes: 9 additions & 1 deletion CADETMatch/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,9 +848,17 @@ def writeProgress(cache, generation, population, halloffame, meta_halloffame, gr
population_product_best = meta_max[0]

line_format = 'Generation: %s \tPopulation: %s \tAverage Score: %.3g \tBest: %.3g \tMinimum Score: %.3g \tBest: %.3g \tProduct Score: %.3g \tBest: %.3g'

alt_line_format = 'Generation: %s \tPopulation: %s \t1 - Average Score: %.3e \tBest: %.3e \t1 - Minimum Score: %.3e \tBest: %.3e \t1 - Product Score: %.3e \tBest: %.3e'

if line_log:
multiprocessing.get_logger().info(line_format, generation, len(population),
if any(meta_max > 0.995):
multiprocessing.get_logger().info(alt_line_format, generation, len(population),
1-population_average, 1-population_average_best,
1-population_min, 1-population_min_best,
1-population_product, 1-population_product_best)
else:
multiprocessing.get_logger().info(line_format, generation, len(population),
population_average, population_average_best,
population_min, population_min_best,
population_product, population_product_best)
Expand Down
2 changes: 1 addition & 1 deletion Examples/MCMC/Dextran/MCMC_dextran_nsga3.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"checkpointFile": "check",
"searchMethod": "NSGA3",
"population": 12,
"finalGradRefinement": 0,
"finalGradRefinement": 1,
"stallGenerations": 10,
"continueMCMC": 1,
"normalizeOutput": 1,
Expand Down

0 comments on commit a74e036

Please sign in to comment.