From db158b4e872e3797f7d6b1510805d5d13fb5949d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Fr=C3=B6hlich?= Date: Tue, 16 Nov 2021 13:50:30 -0500 Subject: [PATCH] Fides 0.7.1 (#44) * add hdf5 history writing * bump version, remove history init * simplify history * allow restriction of iterative update schemes * apply restriction to structured updates * expand steps * simplify & fix hdf5 export * allow disabling curvature condition * fix flake * fixes update restriction * Update test_hessian_approximation.py * Update requirements.txt * remove hess approx restriction * implement hybridfraction method * fixup * fix neg eigenvalue handling * fixup * remove scaled gradient step, fix handling negative EV * fixup * fixup * fix doc? * Update hessian_approximation.py * fix doc? * fixup * use eigenvalues from lstsq * fix: lstsq only returns absolute ev values * add more errors, less verbose default logging * fix eigvals * fix flake * refactor refine to only be performed in subspace * fix flake * fix test * fix flake * fix refine step * reduce logging output * allow user to set start_id * fix history tracking * fixup * bump version * fixup * Update version.py * fix normalization * track condition number * change tracked fval * only real part of EVs * fix curvature condition enforcement * rename posdef_newt * update fraction min iteration to 25 * track newton steps * use more copy to avoid shooting ourselves in the foot --- fides/hessian_approximation.py | 28 ++++----- fides/minimize.py | 103 +++++++++++++++++---------------- fides/steps.py | 28 ++++----- fides/version.py | 2 +- 4 files changed, 81 insertions(+), 80 deletions(-) diff --git a/fides/hessian_approximation.py b/fides/hessian_approximation.py index e2a6ecc..9ec082d 100644 --- a/fides/hessian_approximation.py +++ b/fides/hessian_approximation.py @@ -56,7 +56,7 @@ def get_mat(self) -> np.ndarray: :return: Hessian approximation """ - return self._hess + return self._hess.copy() def get_diff(self) -> np.ndarray: """ @@ -65,7 +65,7 @@ def get_diff(self) -> np.ndarray: :return: Hessian approximation update """ - return self._diff + return self._diff.copy() def set_mat(self, mat: np.ndarray) -> None: """ @@ -78,7 +78,7 @@ def set_mat(self, mat: np.ndarray) -> None: raise ValueError('Passed matrix had inconsistent ' f'shape, was {mat.shape}, ' f'but should be {self._hess.shape}.') - self._hess = mat + self._hess = mat.copy() @property def requires_resfun(self): @@ -88,6 +88,10 @@ def requires_resfun(self): def requires_hess(self): return False # pragma: no cover + def _update_hess_and_store_diff(self, hess): + self._diff = hess - self._hess + self._hess = hess.copy() + class IterativeHessianApproximation(HessianApproximation): """ @@ -144,11 +148,8 @@ def __init__(self, phi: float, init_with_hess: Optional[bool] = False, super(Broyden, self).__init__(init_with_hess) def _compute_update(self, s: np.ndarray, y: np.ndarray): - if y.T.dot(s) <= 0: - self._diff = np.zeros_like(self._hess) - else: - self._diff = broyden_class_update(y, s, self._hess, self.phi, - self.enforce_curv_cond) + self._diff = broyden_class_update(y, s, self._hess, self.phi, + self.enforce_curv_cond) class BFGS(Broyden): @@ -257,8 +258,7 @@ def _switched_update(self, s: np.ndarray, y: np.ndarray, new_hess = self.hessian_update.get_mat() else: new_hess = hess - self._diff = new_hess - self._hess - self._hess = new_hess + self._update_hess_and_store_diff(new_hess) class HybridFixed(HybridSwitchApproximation): @@ -297,7 +297,7 @@ def __init__(self, Switch from a dynamic approximation to the user provided iterative scheme as soon as the fraction of iterations where the step is accepted but the trust region is not update exceeds the user provided - threshold.Threshold check is only performed after 10 iterations. + threshold.Threshold check is only performed after 25 iterations. The switching is non-reversible. The iterative scheme is initialized and updated rom the beginning, but only employed after the switching. @@ -316,7 +316,7 @@ def __init__(self, def update(self, s: np.ndarray, y: np.ndarray, hess: np.ndarray, tr_nonupdates: int, iterations: int): self._switched_update(s, y, hess) - if not self._switched and iterations > 10: + if not self._switched and iterations > 25: self._switched = tr_nonupdates/iterations > self.switch_threshold @@ -440,10 +440,6 @@ def requires_hess(self): def get_structured_diff(self) -> np.ndarray: return self._structured_diff - def _update_hess_and_store_diff(self, hess): - self._diff = hess - self._hess - self._hess = hess - class SSM(StructuredApproximation): """ diff --git a/fides/minimize.py b/fides/minimize.py index 8d7cf0e..4c42847 100644 --- a/fides/minimize.py +++ b/fides/minimize.py @@ -300,7 +300,7 @@ def minimize(self, x0: np.ndarray, start_id: Optional[str] = None): self.hessian_update.init_mat(len(self.x), funout.hess) self.hess = self.hessian_update.get_mat() else: - self.hess = funout.hess + self.hess = funout.hess.copy() funout.checkdims() @@ -353,6 +353,9 @@ def minimize(self, x0: np.ndarray, start_id: Optional[str] = None): self.update(step, funout_new, funout) funout = funout_new + if self.get_option(Options.HISTORY_FILE): + self.track_history(accepted, step, funout_new) + if self.get_option(Options.HISTORY_FILE): with h5py.File(self.get_option(Options.HISTORY_FILE), 'a') as f: g = f.create_group(self.start_id) @@ -436,7 +439,7 @@ def update(self, self.hess = self.hessian_update.get_mat() else: - self.hess = funout_new.hess + self.hess = funout_new.hess.copy() self.check_in_bounds(funout_new.x) self.fval = funout_new.fval self.x = funout_new.x @@ -659,10 +662,6 @@ def log_step(self, accepted: bool, step: Step, funout: Funout): iterspaces = max(len(str(self.get_option(Options.MAXITER))), 5) - \ len(str(self.iteration)) steptypespaces = 4 - len(step.type) - reflspaces, trunspaces = [ - 4 - len(str(count)) - for count in [step.reflection_count, step.truncation_count] - ] fval = funout.fval if not np.isfinite(fval): @@ -680,52 +679,56 @@ def log_step(self, accepted: bool, step: Step, funout: Funout): f' | {int(accepted)}' ) - if self.get_option(Options.HISTORY_FILE): + def track_history(self, accepted: bool, step: Step, funout: Funout): + normdx = norm(step.s + step.s0) + normg = norm(self.grad) + + min_ev_hess, max_ev_hess = _min_max_evs(self.hess) + min_ev_hess_update, max_ev_hess_update = np.NaN, np.NaN + min_ev_hess_supdate, max_ev_hess_supdate = np.NaN, np.NaN + if self.hessian_update: + if accepted: + min_ev_hess_update, max_ev_hess_update = \ + _min_max_evs(self.hessian_update.get_diff()) + else: + min_ev_hess_update, max_ev_hess_update = 0.0, 0.0 - min_ev_hess, max_ev_hess = _min_max_evs(self.hess) - min_ev_hess_update, max_ev_hess_update = np.NaN, np.NaN - min_ev_hess_supdate, max_ev_hess_supdate = np.NaN, np.NaN - if self.hessian_update: + if isinstance(self.hessian_update, StructuredApproximation): if accepted: - min_ev_hess_update, max_ev_hess_update = \ - _min_max_evs(self.hessian_update.get_diff()) + min_ev_hess_supdate, max_ev_hess_supdate = \ + _min_max_evs( + self.hessian_update.get_structured_diff() + ) else: - min_ev_hess_update, max_ev_hess_update = 0.0, 0.0 - - if isinstance(self.hessian_update, StructuredApproximation): - if accepted: - min_ev_hess_supdate, max_ev_hess_supdate = \ - _min_max_evs( - self.hessian_update.get_structured_diff() - ) - else: - min_ev_hess_supdate, max_ev_hess_supdate = 0.0, 0.0 - - update = { - 'fval': self.fval, - 'tr_ratio': self.tr_ratio, - 'tr_radius': self.delta_iter, - 'normgrad': normg, - 'normstep': normdx, - 'theta': step.theta, - 'alpha': step.alpha, - 'reflections': step.reflection_count, - 'truncations': step.truncation_count, - 'accept': accepted, - 'hess_min_ev': min_ev_hess, - 'hess_max_ev': max_ev_hess, - 'hess_update_min_ev': min_ev_hess_update, - 'hess_update_max_ev': max_ev_hess_update, - 'hess_struct_update_min_ev': min_ev_hess_supdate, - 'hess_struct_update_max_ev': max_ev_hess_supdate, - 'iterations_since_tr_update': self.iterations_since_tr_update, - 'step_type': step.type, - 'subspace_dim': step.subspace.shape[1], - 'posdef_newt': step.posdef_newt - if hasattr(step, 'posdef_newt') else False, - } - for key, val in update.items(): - self.history[key].append(val) + min_ev_hess_supdate, max_ev_hess_supdate = 0.0, 0.0 + + update = { + 'fval': funout.fval, + 'tr_ratio': self.tr_ratio, + 'tr_radius': self.delta_iter, + 'normgrad': normg, + 'normstep': normdx, + 'theta': step.theta, + 'alpha': step.alpha, + 'reflections': step.reflection_count, + 'truncations': step.truncation_count, + 'accept': accepted, + 'hess_min_ev': min_ev_hess, + 'hess_max_ev': max_ev_hess, + 'hess_update_min_ev': min_ev_hess_update, + 'hess_update_max_ev': max_ev_hess_update, + 'hess_struct_update_min_ev': min_ev_hess_supdate, + 'hess_struct_update_max_ev': max_ev_hess_supdate, + 'iterations_since_tr_update': self.iterations_since_tr_update, + 'step_type': step.type, + 'subspace_dim': step.subspace.shape[1], + 'posdef': step.posdef if hasattr(step, 'posdef') else False, + 'newton': step.newton if hasattr(step, 'newton') else False, + 'cond_hess': np.linalg.cond(self.hess), + 'cond_shess': np.linalg.cond(step.shess), + } + for key, val in update.items(): + self.history[key].append(val) def log_step_initial(self): """ @@ -842,4 +845,4 @@ def get_option(self, option): def _min_max_evs(mat: np.ndarray): evs = np.linalg.eigvals(mat) - return np.min(evs), np.max(evs) + return np.real(np.min(evs)), np.real(np.max(evs)) diff --git a/fides/steps.py b/fides/steps.py index 36ab12f..cf5e665 100644 --- a/fides/steps.py +++ b/fides/steps.py @@ -117,8 +117,8 @@ def __init__(self, self.og_sc: Union[np.ndarray, None] = None self.og_ss: Union[np.ndarray, None] = None - self.sg: np.ndarray = sg - self.scaling: csc_matrix = scaling + self.sg: np.ndarray = sg.copy() + self.scaling: csc_matrix = scaling.copy() self.delta: float = delta self.theta: float = theta @@ -265,23 +265,25 @@ def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, logger): super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, logger) - s_newt, _, _, _ = linalg.lstsq(self.shess, sg) - s_newt *= -1 + s_newt = - linalg.lstsq(self.shess, sg)[0] # lstsq only returns absolute ev values e, v = np.linalg.eig(self.shess) - self.posdef_newt = np.min(np.real(e)) > \ - - np.spacing(1) * np.max(np.abs(e)) + self.posdef = np.min(np.real(e)) > - np.spacing(1) * np.max(np.abs(e)) if len(sg) == 1: s_newt = - sg[0]/self.shess[0] self.subspace = np.expand_dims(s_newt, 1) return - if self.posdef_newt: + self.newton = False + + if self.posdef: s_newt = - linalg.lstsq(self.shess, sg)[0] if norm(s_newt) < delta: # Case 0 of Fig 12 in [ColemanLi1994] + normalize(s_newt) + self.newton = True self.subspace = np.expand_dims(s_newt, 1) return @@ -376,8 +378,8 @@ def conj_grad(self, eps): beta = rp2 / r2 d = -rp + beta * d - z = zp - r = rp + z = zp.copy() + r = rp.copy() class TRStepReflected(Step): @@ -479,9 +481,9 @@ def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, step): super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, step.logger) - self.subspace: np.ndarray = step.subspace - self.chess: np.ndarray = step.chess - self.cg: np.ndarray = step.cg + self.subspace: np.ndarray = step.subspace.copy() + self.chess: np.ndarray = step.chess.copy() + self.cg: np.ndarray = step.cg.copy() self.constraints = [ NonlinearConstraint( fun=lambda xc: (norm(self.subspace.dot(xc)) - delta) * @@ -498,7 +500,7 @@ def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta, ub=(ub - x) / scaling.diagonal() ) ] - self.guess: np.ndarray = step.sc + self.guess: np.ndarray = step.sc.copy() self.qpval0: float = step.qpval self.reflection_indices: int = step.reflection_indices self.truncation_indices: int = step.truncation_indices diff --git a/fides/version.py b/fides/version.py index 49e0fc1..a5f830a 100644 --- a/fides/version.py +++ b/fides/version.py @@ -1 +1 @@ -__version__ = "0.7.0" +__version__ = "0.7.1"