Skip to content

Commit

Permalink
Dextran scores have been updated to better choose endpoints and smoot…
Browse files Browse the repository at this point in the history
…hing

Added a function to score.py which uses pearson against a spline instead of against the array data, this prevents conversation of spline to array and back to spline agian
  • Loading branch information
Immudzen committed Jan 14, 2020
1 parent e728a76 commit d15e6fb
Show file tree
Hide file tree
Showing 11 changed files with 151 additions and 45 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,5 @@ __pycache__/
/Examples/MCMC/Dextran/nsga3_mcmc_6_emcee6_fast
/Examples/MCMC/Dextran/dextran_pulse_fast.csv
/Examples/MCMC/Dextran/dextran_pulse_high.h5
/Examples/Example1/Dextran/fit_nsga3_deap_new
/Examples/Example1/Dextran/fit_nsga3_deap_sse
2 changes: 1 addition & 1 deletion CADETMatch.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: CADETMatch
Version: 0.5.13
Version: 0.5.15
Summary: CADETMatch is a parameter estimation and error modeling library for CADET
Home-page: https://github.com/modsim/CADET-Match
Author: William Heymann
Expand Down
2 changes: 1 addition & 1 deletion CADETMatch/CADETMatch.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<IsWindowsApplication>False</IsWindowsApplication>
<InterpreterId>CondaEnv|CondaEnv|CADETMatch</InterpreterId>
<LaunchProvider>Standard Python launcher</LaunchProvider>
<CommandLineArguments>"C:\Users\kosh_000\Documents\Visual Studio 2017\Projects\CADETMatch\Examples\MCMC\Dextran\MCMC_dextran_nsga3.json" 0</CommandLineArguments>
<CommandLineArguments>"F:\Parameter Estimation\Synthetic\Dextran\NSGA3_dextran_SSE.json" 6</CommandLineArguments>
<EnableNativeCodeDebugging>False</EnableNativeCodeDebugging>
<InterpreterArguments>
</InterpreterArguments>
Expand Down
4 changes: 2 additions & 2 deletions CADETMatch/generate_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,9 +472,9 @@ def plot_2d_single(directory_path, header_x, scoreName, data, scores):
if numpy.all(scores <= 0):
scores = numpy.abs(scores)

score_scale = numpy.max(scores)/numpy.min(scores)
#score_scale = numpy.max(scores)/numpy.min(scores)

if score_scale > 1e2:
if scoreName.startswith('1-'):
scores = numpy.log(scores)
scoreName = 'log(%s)' % scoreName

Expand Down
3 changes: 2 additions & 1 deletion CADETMatch/gradFD.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,9 @@ def filterOverlapArea(cache, checkOffspring, cutoff=0.01):
percent = temp_area_overlap / temp_area_total
if percent > cutoff:
temp_offspring.append( (percent, ind) )
multiprocessing.get_logger().info('kept %s with overlap (%s) in gradient descent', ind, percent)
else:
multiprocessing.get_logger().info('removed %s for insufficient overlap in gradient descent', ind)
multiprocessing.get_logger().info('removed %s for insufficient overlap (%s) in gradient descent', ind, percent)
else:
multiprocessing.get_logger().info('removed %s for failure', ind)

Expand Down
40 changes: 40 additions & 0 deletions CADETMatch/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,46 @@ def pearson_spline(exp_time_values, sim_data_values, exp_data_values):

return score, dt

def pearson_spline_fun(exp_time_values, exp_data_values, sim_spline):
#resample to a much smaller time step to get a more precise offset
dt = 1e-1
times = numpy.arange(exp_time_values[0], exp_time_values[-1], dt)

sim_resample = sim_spline(times)
exp_resample = scipy.interpolate.InterpolatedUnivariateSpline(exp_time_values, exp_data_values, ext=3)(times)

corr = scipy.signal.correlate(exp_resample, sim_resample) #/(numpy.linalg.norm(sim_resample) * numpy.linalg.norm(exp_resample))

index = numpy.argmax(corr)

dt_pre = (index - len(times) + 1) * dt

#refine the time offset using a deterministic method (powells)
dt = refine_time_fun(dt_pre, exp_time_values, exp_data_values, sim_spline)

#calculate pearson correlation at the new time
sim_data_values_copy = sim_spline(exp_time_values - dt)
pear = scipy.stats.pearsonr(exp_data_values, sim_data_values_copy)[0]
score = pear_corr(pear)

return score, dt

def refine_time_fun(dt, exp_time_values, exp_data_values, spline):
"refine the time using powells method (gradient free) system is not smooth enough for gradient to work well"
exp_time_values = numpy.array(exp_time_values)
exp_data_values = numpy.array(exp_data_values)

def goal_sse(offset):
sim_data_values_copy = spline(exp_time_values - offset)

sse = numpy.sum((exp_data_values - sim_data_values_copy)**2)

return sse

result = scipy.optimize.minimize(goal_sse, dt, method='powell')

return result.x

def time_function_decay(CV_time, peak_time, diff_input=False):
x_exp = numpy.array([0, 1.0*CV_time])
y_exp = numpy.array([1, 0.5])
Expand Down
43 changes: 34 additions & 9 deletions CADETMatch/scores/dextranSSE.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,16 @@ def run(sim_data, feature):

sim_time_values, sim_data_values = util.get_times_values(sim_data['simulation'], feature)

diff = feature['value'] - sim_data_values

if max(sim_data_values) < max_value: #the system has no point higher than the value we are looking for
#remove hard failure
max_value = max(sim_data_values)

exp_time_values = exp_time_values[selected]
exp_data_zero = feature['exp_data_zero']

min_index = numpy.argmax(sim_data_values >= 1e-3*max_value)
max_index = numpy.argmax(sim_data_values >= max_value)

sim_data_zero = numpy.zeros(len(sim_data_values))
sim_data_zero[min_index:max_index+1] = sim_data_values[min_index:max_index+1]
sim_data_values_smooth = smoothing.smooth_data(sim_time_values, sim_data_values,
feature['critical_frequency'], feature['smoothing_factor'])
sim_data_zero = cut_zero(sim_time_values, sim_data_values_smooth, 1e-3*max_value, max_value)

sse = util.sse(sim_data_zero, exp_data_zero)

Expand All @@ -63,8 +59,7 @@ def setup(sim, feature, selectedTimes, selectedValues, CV_time, abstol, cache):
min_time = selectedTimes[min_index]
min_value = smooth_value[min_index]

exp_data_zero = numpy.zeros(len(smooth_value))
exp_data_zero[min_index:max_index+1] = smooth_value[min_index:max_index+1]
exp_data_zero = cut_zero(selectedTimes, smooth_value, 1e-3*max_value, max_value)

multiprocessing.get_logger().info("Dextran %s start: %s stop: %s max value: %s", name,
min_time, max_time, max_value)
Expand All @@ -85,7 +80,37 @@ def headers(experimentName, feature):
temp = ["%s_SSE" % name,]
return temp

def cut_zero(times, values, min_value, max_value):
"cut the raw times and values as close to min_value and max_value as possible and set the rest to zero without smoothing"
data_zero = numpy.zeros(len(times))

offset = numpy.array([-1, 0, 1])

#find the starting point for the min_index and max_index, the real value could be off by 1 in either direction
peak_max_index = numpy.argmax(values)

max_index = numpy.argmax(values[:peak_max_index] >= max_value)

if not max_index:
#if max_index is zero the whole array is below the value we are searching for so just returnt the whole array
return values

min_index = numpy.argmax(values[:max_index] >= min_value)

check_max_index = max_index + offset
check_max_value = values[check_max_index]
check_max_diff = numpy.abs(check_max_value - max_value)

check_min_index = min_index + offset
check_min_value = values[check_min_index]
check_min_diff = numpy.abs(check_min_value - min_value)

min_index = min_index + offset[numpy.argmin(check_min_diff)]
max_index = max_index + offset[numpy.argmin(check_max_diff)]

data_zero[min_index:max_index+1] = values[min_index:max_index+1]

return data_zero



Expand Down
84 changes: 59 additions & 25 deletions CADETMatch/scores/dextranShape.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@ def run(sim_data, feature):
exp_time_zero = feature['exp_time_zero']
exp_data_zero = feature['exp_data_zero']

sim_data_zero = cut_front(sim_time_values, sim_data_values, exp_time_zero,
sim_spline, sim_data_zero_sse = cut_front(sim_time_values, sim_data_values,
feature['min_value_front'], feature['max_value_front'],
feature['smoothing_factor'], feature['critical_frequency'])
pearson, diff_time = score.pearson_spline(exp_time_zero, exp_data_zero, sim_data_zero)
feature['critical_frequency'], feature['smoothing_factor'])

pearson, diff_time = score.pearson_spline_fun(exp_time_zero, exp_data_zero, sim_spline)

exp_data_zero_sse = feature['exp_data_zero_sse']
sim_data_zero_sse = scipy.interpolate.InterpolatedUnivariateSpline(exp_time_zero, sim_data_zero, ext=1)(sim_time_values)

temp = [pearson,
feature['offsetTimeFunction'](numpy.abs(diff_time)),
Expand All @@ -44,13 +43,11 @@ def setup(sim, feature, selectedTimes, selectedValues, CV_time, abstol, cache):
temp = {}
#change the stop point to be where the max positive slope is along the searched interval
name = '%s_%s' % (sim.root.experiment_name, feature['name'])
exp_time_zero, exp_data_zero, min_time, min_value, max_time, max_value, s, crit_fs = cut_front_find(selectedTimes, selectedValues, name, cache)
exp_time_zero, exp_data_zero, exp_data_zero_sse, min_time, min_value, max_time, max_value, s, crit_fs = cut_front_find(selectedTimes, selectedValues, name, cache)

multiprocessing.get_logger().info("Dextran %s start: %s stop: %s max value: %s", name,
min_time, max_time, max_value)

exp_data_zero_sse = scipy.interpolate.InterpolatedUnivariateSpline(exp_time_zero, exp_data_zero, ext=1)(selectedTimes)

temp['min_time'] = feature['start']
temp['max_time'] = feature['stop']

Expand Down Expand Up @@ -95,11 +92,11 @@ def goal(time):
max_time = float(result.x)
max_value = spline(float(result.x))

min_index = numpy.argmax(smooth_value >= 1e-2*max_value)
min_index = numpy.argmax(smooth_value >= 1e-3*max_value)
min_time = times[min_index]

def goal(time):
return abs(spline(time)-1e-2*max_value)
return abs(spline(time)-1e-3*max_value)

result = scipy.optimize.minimize(goal, min_time, method='powell')

Expand All @@ -111,50 +108,87 @@ def goal(time):

new_times = numpy.linspace(times[0], times[-1], needed_points)
new_values = spline(new_times)

data_zero = cut_zero(new_times, new_values, min_value, max_value)

max_index = numpy.argmax(new_values >= max_value)
min_index = numpy.argmax(new_values >= min_value)
return (new_times, data_zero, cut_zero(times, smooth_value, min_value, max_value),
min_time, min_value, max_time, max_value, s, crit_fs)

data_zero = numpy.zeros(needed_points)
def cut_front(times, values, min_value, max_value, crit_fs, s):
max_index = numpy.argmax(values >= max_value)

data_zero[min_index:max_index+1] = new_values[min_index:max_index+1]
if max_index == 0:
#no point high enough was found so use the highest point
s, crit_fs = smoothing.find_smoothing_factors(times, values, None, None)
max_index = numpy.argmax(values)

return new_times, data_zero, min_time, min_value, max_time, max_value, s, crit_fs
max_time = times[max_index]
max_value = values[max_index]

def cut_front(times, values, new_times, min_value, max_value, s, crit_fs):
smooth_value = smoothing.smooth_data(times, values, crit_fs, s)

spline = scipy.interpolate.InterpolatedUnivariateSpline(times, smooth_value, ext=1)

max_index = numpy.argmax(values >= max_value)
max_time = times[max_index]

def goal(time):
return abs(spline(time)-max_value)

result = scipy.optimize.minimize(goal, max_time, method='powell')

max_time = float(result.x)
max_value = spline(float(result.x))
max_index = max(numpy.argmax(values >= max_value), max_index)

min_index = numpy.argmax(values >= min_value)
min_index = numpy.argmax(values[:max_index] >= min_value)
min_time = times[min_index]

def goal(time):
if time > max_time:
#don't search to the right of the max_time for a lower value
return 1e10
return abs(spline(time)-min_value)

result = scipy.optimize.minimize(goal, min_time, method='powell')

min_time = float(result.x)
min_value = spline(float(result.x))

new_values = spline(new_times)
needed_points = int( (max_time - min_time) * 10)
new_times_spline = numpy.linspace(min_time, max_time, needed_points)
new_values_spline = spline(new_times_spline)

max_index = numpy.argmax(new_values >= max_value)
min_index = numpy.argmax(new_values >= min_value)
spline = scipy.interpolate.InterpolatedUnivariateSpline(new_times_spline, new_values_spline, ext=1)

return spline, cut_zero(times, smooth_value, min_value, max_value)

def cut_zero(times, values, min_value, max_value):
"cut the raw times and values as close to min_value and max_value as possible and set the rest to zero without smoothing"
data_zero = numpy.zeros(len(times))

offset = numpy.array([-1, 0, 1])

#find the starting point for the min_index and max_index, the real value could be off by 1 in either direction
peak_max_index = numpy.argmax(values)

max_index = numpy.argmax(values[:peak_max_index] >= max_value)

data_zero = numpy.zeros(len(new_times))
data_zero[min_index:max_index+1] = new_values[min_index:max_index+1]
if not max_index:
#if max_index is zero the whole array is below the value we are searching for so just returnt the whole array
return values

min_index = numpy.argmax(values[:max_index] >= min_value)

check_max_index = max_index + offset
check_max_value = values[check_max_index]
check_max_diff = numpy.abs(check_max_value - max_value)

check_min_index = min_index + offset
check_min_value = values[check_min_index]
check_min_diff = numpy.abs(check_min_value - min_value)

min_index = min_index + offset[numpy.argmin(check_min_diff)]
max_index = max_index + offset[numpy.argmin(check_max_diff)]

data_zero[min_index:max_index+1] = values[min_index:max_index+1]

return data_zero

Expand Down
8 changes: 6 additions & 2 deletions CADETMatch/smoothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ def goal(crit_fs):

return crit_fs

def refine_smooth(times, values, x, y, start):
def refine_smooth(times, values, x, y, start, name):
if name is None:
name = 'unknown'

x = numpy.array(x)
y = numpy.array(y)

Expand Down Expand Up @@ -211,7 +214,8 @@ def find_smoothing_factors(times, values, name, cache):

s, s_knots = util.find_Left_L(all_s,knots)

s, s_knots = refine_smooth(times, values_filter, all_s, knots, s)
if s is not None:
s, s_knots = refine_smooth(times, values_filter, all_s, knots, s, name)

record_smoothing(s, s_knots, crit_fs, knots, all_s, name, cache)

Expand Down
6 changes: 3 additions & 3 deletions Examples/Example1/Dextran/NSGA3_dextran.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
"CADETPath": "C:/Users/kosh_000/cadet_build/CADET-dev/MS_SMKL_RELEASE/bin/cadet-cli.exe",
"baseDir": "C:/Users/kosh_000/Documents/Visual Studio 2017/Projects/CADETMatch/Examples/Example1/Dextran",
"tempDir": "L:/",
"resultsDir": "fit_nsga3_deap",
"resultsDir": "fit_nsga3_deap_sse",
"CSV": "dextran_NSGA3.csv",
"checkpointFile": "check",
"stopAverage": 1,
"stopBest": 1,
"gradCheck": 1.0,
"gradVector": 1,
"searchMethod": "NSGA3",
"searchMethod": "Multistart",
"mutationRate": 1.0,
"crossoverRate": 1.0,
"generations": 1000,
Expand Down Expand Up @@ -51,7 +51,7 @@
"features": [
{
"name": "Pulse",
"type": "DextranShape"
"type": "DextranSSE"
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="CADETMatch",
version="0.5.13",
version="0.5.16",
author="William Heymann",
author_email="[email protected]",
description="CADETMatch is a parameter estimation and error modeling library for CADET",
Expand Down

0 comments on commit d15e6fb

Please sign in to comment.