DextranShape uses more splines and resampling to get the offset, lowe…

…r and upper bounds much better which allows the offset calculation to be more accurate (previously the system was heavily biased towards the sampling time points which means the space was not continuous for the kernel density estimator) Added an alt line format for writing out generation data to switch to 1-score when the score is very close to 1 (makes it easier to see progress)
cadet · Jan 6, 2020 · a74e036 · a74e036
1 parent 0130ac3
commit a74e036
Show file tree

Hide file tree

Showing 3 changed files with 113 additions and 38 deletions.
diff --git a/CADETMatch/scores/dextranShape.py b/CADETMatch/scores/dextranShape.py
@@ -17,69 +17,55 @@
 
 def run(sim_data, feature):
     "special score designed for dextran. This looks at only the front side of the peak up to the maximum slope and pins a value at the elbow in addition to the top"
-    exp_time_values = feature['time']
-    max_value = feature['max_value']
-
-    selected = feature['selected']
-
     sim_time_values, sim_data_values = util.get_times_values(sim_data['simulation'], feature)
 
-    if max(sim_data_values) < max_value: #the system has no point higher than the value we are looking for
-        #remove hard failure
-        max_value = max(sim_data_values)
-
-    exp_time_values = exp_time_values[selected]
+    exp_time_zero = feature['exp_time_zero']
     exp_data_zero = feature['exp_data_zero']
+
+    sim_data_zero = cut_front(sim_time_values, sim_data_values, exp_time_zero, 
+                                             feature['min_value_front'], feature['max_value_front'],
+                                             feature['smoothing_factor'], feature['critical_frequency'])
+
+    pearson, diff_time = score.pearson_spline(exp_time_zero, exp_data_zero, sim_data_zero)
 
-    min_index = numpy.argmax(sim_data_values >= 1e-3*max_value)
-    max_index = numpy.argmax(sim_data_values >= max_value)
-
-    sim_data_zero = numpy.zeros(len(sim_data_values))
-    sim_data_zero[min_index:max_index+1] = sim_data_values[min_index:max_index+1]
-
-    pearson, diff_time = score.pearson_spline(exp_time_values, sim_data_zero, exp_data_zero)
+    exp_data_zero_sse = feature['exp_data_zero_sse']
+    sim_data_zero_sse = scipy.interpolate.InterpolatedUnivariateSpline(exp_time_zero, sim_data_zero, ext=1)(sim_time_values)
 
     temp = [pearson,
             feature['offsetTimeFunction'](numpy.abs(diff_time)),
             ]
 
-    data = (temp, util.sse(sim_data_zero, exp_data_zero), len(sim_data_zero), 
-            sim_time_values, sim_data_zero, exp_data_zero, [1.0 - i for i in temp])
+    data = (temp, util.sse(sim_data_zero_sse, exp_data_zero_sse), len(sim_data_zero_sse), 
+            sim_time_values, sim_data_zero_sse, exp_data_zero_sse, [1.0 - i for i in temp])
 
     return data
 
 def setup(sim, feature, selectedTimes, selectedValues, CV_time, abstol, cache):
     temp = {}
     #change the stop point to be where the max positive slope is along the searched interval
     name = '%s_%s' % (sim.root.experiment_name,   feature['name'])
-    s, crit_fs = smoothing.find_smoothing_factors(selectedTimes, selectedValues, name, cache)
-    values = smoothing.smooth_data_derivative(selectedTimes, selectedValues, crit_fs, s)
-
-    smooth_value = smoothing.smooth_data(selectedTimes, selectedValues, crit_fs, s)
-
-    max_index = numpy.argmax(values)
-    max_time = selectedTimes[max_index]
-    max_value = smooth_value[max_index]
-
-    min_index = numpy.argmax(smooth_value >= 1e-3*max_value)
-    min_time = selectedTimes[min_index]
-    min_value = smooth_value[min_index]    
-
-    exp_data_zero = numpy.zeros(len(smooth_value))
-    exp_data_zero[min_index:max_index+1] = smooth_value[min_index:max_index+1]
+    exp_time_zero, exp_data_zero, min_time, min_value, max_time, max_value, s, crit_fs = cut_front_find(selectedTimes, selectedValues, name, cache)
 
     multiprocessing.get_logger().info("Dextran %s  start: %s   stop: %s  max value: %s", name, 
                                       min_time, max_time, max_value)
 
+    exp_data_zero_sse = scipy.interpolate.InterpolatedUnivariateSpline(exp_time_zero, exp_data_zero, ext=1)(selectedTimes)
+
     temp['min_time'] = feature['start']
     temp['max_time'] = feature['stop']
-    temp['max_value'] = max_value
+
+    temp['min_time_front'] = min_time
+    temp['min_value_front'] = min_value
+    temp['max_time_front'] = max_time
+    temp['max_value_front'] = max_value
+
+    temp['exp_time_zero'] = exp_time_zero
     temp['exp_data_zero'] = exp_data_zero
+    temp['exp_data_zero_sse'] = exp_data_zero_sse
     temp['offsetTimeFunction'] = score.time_function_decay_cv(CV_time, selectedTimes, max_time)
     temp['peak_max'] = max_value
     temp['smoothing_factor'] = s
     temp['critical_frequency'] = crit_fs
-    temp['smooth_value'] = smooth_value
     return temp
 
 def headers(experimentName, feature):
@@ -89,7 +75,88 @@ def headers(experimentName, feature):
             ]
     return temp
 
+def cut_front_find(times, values, name, cache):
+    s, crit_fs = smoothing.find_smoothing_factors(times, values, name, cache)
+    values_der = smoothing.smooth_data_derivative(times, values, crit_fs, s)
 
+    smooth_value = smoothing.smooth_data(times, values, crit_fs, s)
+
+    spline_der = scipy.interpolate.InterpolatedUnivariateSpline(times, values_der, ext=1)
+    spline = scipy.interpolate.InterpolatedUnivariateSpline(times, smooth_value, ext=1)
+
+    max_index = numpy.argmax(values)
+    max_time = times[max_index]
+
+    def goal(time):
+        return -spline_der(time)
+
+    result = scipy.optimize.minimize(goal, max_time, method='powell')
+
+    max_time = float(result.x)
+    max_value = spline(float(result.x))
+
+    min_index = numpy.argmax(smooth_value >= 1e-2*max_value)
+    min_time = times[min_index]
+
+    def goal(time):
+        return abs(spline(time)-1e-2*max_value)
+
+    result = scipy.optimize.minimize(goal, min_time, method='powell')
+
+    min_time = float(result.x)
+    min_value = spline(float(result.x))
+
+    #resample to 100 points/second
+    needed_points = int( (times[-1] - times[0]) * 100)
+
+    new_times = numpy.linspace(times[0], times[-1], needed_points)
+    new_values = spline(new_times)
+
+    max_index = numpy.argmax(new_values >= max_value)
+    min_index = numpy.argmax(new_values >= min_value)
+
+    data_zero = numpy.zeros(needed_points)
+
+    data_zero[min_index:max_index+1] = new_values[min_index:max_index+1]
+
+    return new_times, data_zero, min_time, min_value, max_time, max_value, s, crit_fs
+
+def cut_front(times, values, new_times, min_value, max_value, s, crit_fs):
+    smooth_value = smoothing.smooth_data(times, values, crit_fs, s)
+
+    spline = scipy.interpolate.InterpolatedUnivariateSpline(times, smooth_value, ext=1)
+
+    max_index = numpy.argmax(values >= max_value)
+    max_time = times[max_index]
+
+    def goal(time):
+        return abs(spline(time)-max_value)
+
+    result = scipy.optimize.minimize(goal, max_time, method='powell')
+
+    max_time = float(result.x)
+    max_value = spline(float(result.x))
+
+    min_index = numpy.argmax(values >= min_value)
+    min_time = times[min_index]
+
+    def goal(time):
+        return abs(spline(time)-min_value)
+
+    result = scipy.optimize.minimize(goal, min_time, method='powell')
+
+    min_time = float(result.x)
+    min_value = spline(float(result.x))
+
+    new_values = spline(new_times)
+
+    max_index = numpy.argmax(new_values >= max_value)
+    min_index = numpy.argmax(new_values >= min_value)
+
+    data_zero = numpy.zeros(len(new_times))    
+    data_zero[min_index:max_index+1] = new_values[min_index:max_index+1]
+
+    return data_zero
 
 
 

diff --git a/CADETMatch/util.py b/CADETMatch/util.py
@@ -848,9 +848,17 @@ def writeProgress(cache, generation, population, halloffame, meta_halloffame, gr
             population_product_best = meta_max[0]
 
             line_format = 'Generation: %s \tPopulation: %s \tAverage Score: %.3g \tBest: %.3g \tMinimum Score: %.3g \tBest: %.3g \tProduct Score: %.3g \tBest: %.3g'
+
+            alt_line_format = 'Generation: %s \tPopulation: %s \t1 - Average Score: %.3e \tBest: %.3e \t1 - Minimum Score: %.3e \tBest: %.3e \t1 - Product Score: %.3e \tBest: %.3e'
 
             if line_log:
-                multiprocessing.get_logger().info(line_format, generation, len(population),
+                if any(meta_max > 0.995):
+                    multiprocessing.get_logger().info(alt_line_format, generation, len(population),
+                      1-population_average, 1-population_average_best,
+                      1-population_min, 1-population_min_best,
+                      1-population_product, 1-population_product_best)
+                else:
+                    multiprocessing.get_logger().info(line_format, generation, len(population),
                       population_average, population_average_best,
                       population_min, population_min_best,
                       population_product, population_product_best)

diff --git a/Examples/MCMC/Dextran/MCMC_dextran_nsga3.json b/Examples/MCMC/Dextran/MCMC_dextran_nsga3.json
@@ -7,7 +7,7 @@
     "checkpointFile": "check",
     "searchMethod": "NSGA3",
     "population": 12,
-    "finalGradRefinement": 0,
+    "finalGradRefinement": 1,
     "stallGenerations": 10,
     "continueMCMC": 1,
     "normalizeOutput": 1,