diff --git a/.cirrus.yml b/.cirrus.yml index 23306f8..73e6074 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,15 +1,20 @@ env: CIRRUS_CLONE_SUBMODULES: true - +# quay.io/pypa/manylinux2014_x86_64 # CentOS 7 (use GCC 10) +# quay.io/pypa/manylinux_2_24_x86_64 # Debian (unknown) +# quay.io/pypa/manylinux_2_28_x86_64 # AlmaLinux (use clang) linux_task: container: - image: quay.io/pypa/manylinux_2_28_x86_64 + # image: quay.io/pypa/manylinux_2_28_x86_64 + image: quay.io/pypa/manylinux2014_x86_64 only_if: changesInclude('.cirrus.yml', '**.{h,cpp,py}') env: - CC: clang - CXX: clang++ - PATH: ${PATH}:/opt/python/cp310-cp310/bin + # CC: clang + # CXX: clang++ + CC: gcc + CXX: g++ + PATH: /opt/python/cp310-cp310/bin:${PATH} pip_cache: folder: ~/.cache/pip fingerprint_script: echo $PYTHON_VERSION diff --git a/pyproject.toml b/pyproject.toml index 801bfe6..9ba1d92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,11 +44,15 @@ test-requires = ["pytest", "pytest-cov", "pytest-benchmark", "bokeh"] # coverage test-command = "python -m pytest {package}/tests/ --cov={package} --benchmark-skip" build-verbosity = 1 skip = "cp36-* pp* cp37-* *_ppc64le *_i686 *_s390x *-musllinux*" # todo: revisit musllinux -manylinux-x86_64-image = "manylinux_2_28" # prefer the newer one +# manylinux-x86_64-image = "manylinux_2_28" # prefer the newer one +manylinux-x86_64-image = "manylinux2014" + [tool.cibuildwheel.linux] before-build = "bash {project}/tools/cibw_linux.sh {project}" -environment = { CC="clang", CXX="clang++" } +environment = { CC="gcc", CXX="g++" } # for manylinux2014 +# environment = { CC="clang", CXX="clang++" } # for manylinux_2_28 + # before-build = ["ulimit -n 4096", "yum install -y clang", "yum install -y openblas"] [tool.cibuildwheel.macos] diff --git a/src/primate/_trace.cpp b/src/primate/_trace.cpp index c4842d8..7f27f55 100644 --- a/src/primate/_trace.cpp +++ b/src/primate/_trace.cpp @@ -14,10 +14,12 @@ using namespace pybind11::literals; template< typename F > using py_array = py::array_t< F, py::array::f_style | py::array::forcecast >; + // Template function for generating module definitions for a given Operator / precision template< bool multithreaded, std::floating_point F, class Matrix, LinearOperator Wrapper > void _trace_wrapper(py::module& m){ using ArrayF = Eigen::Array< F, Dynamic, 1 >; + using VectorF = Eigen::Matrix< F, Dynamic, 1 >; m.def("hutch", []( const Matrix& A, @@ -60,6 +62,21 @@ void _trace_wrapper(py::module& m){ "matvec_time_us"_a = op.matvec_time ); }); + + // Computes the trace of Q.T @ (A @ Q) including the inner terms q_i^T A q_i + m.def("quad_sum", [](const Matrix& A, DenseMatrix< F > Q) -> py_array< F > { + const auto op = Wrapper(A); + F quad_sum = 0.0; + const size_t N = static_cast< size_t >(Q.cols()); + auto estimates = static_cast< ArrayF >(ArrayF::Zero(N)); + auto y = static_cast< VectorF >(VectorF::Zero(Q.rows())); + for (size_t j = 0; j < N; ++j){ + op.matvec(Q.col(j).data(), y.data()); + estimates[j] = Q.col(j).adjoint().dot(y); + quad_sum += estimates[j]; + } + return py::cast(estimates); + }); } // const Matrix& A, std::function< F(F) > fun, int lanczos_degree, F lanczos_rtol, int _orth, int _ncv diff --git a/src/primate/functional.py b/src/primate/functional.py index b8fd081..3164288 100644 --- a/src/primate/functional.py +++ b/src/primate/functional.py @@ -10,6 +10,7 @@ from .diagonalize import lanczos ## Since Python has no support for inline creation of sized generators +## Based on "Estimating the Largest Eigenvalue by the Power and Lanczos Algorithms with a Random Start" class RelativeErrorBound(): def __init__(self, n: int): self.base_num = 2.575 * np.log(n) diff --git a/src/primate/meson.build b/src/primate/meson.build index 144eb99..6fb3283 100644 --- a/src/primate/meson.build +++ b/src/primate/meson.build @@ -27,6 +27,7 @@ python_sources = [ 'stats.py', 'special.py', 'functional.py', + 'quadrature.py', '__init__.py' ] diff --git a/src/primate/operator.py b/src/primate/operator.py index 02e0a29..7eba55d 100644 --- a/src/primate/operator.py +++ b/src/primate/operator.py @@ -114,4 +114,20 @@ def _matvec(self, x: np.ndarray) -> np.ndarray: self._z[:len(x)] = x x_fft = np.fft.fft(self._z) y = np.real(np.fft.ifft(self._dfft * x_fft)) - return y[:len(x)] \ No newline at end of file + return y[:len(x)] + + +## For use with e.g. Hutch++ +class OrthComplement(LinearOperator): + def __init__(self, A: Union[LinearOperator, np.ndarray], Q: np.ndarray): + self.A = A + self.Q = Q + self.shape = A.shape + self.dtype = self.Q.dtype + + def _deflate(self, w: np.ndarray) -> np.ndarray: + return w - self.Q @ (self.Q.T @ w) + + def _matvec(self, x: np.ndarray) -> np.ndarray: + y = self._deflate(x) + return self._deflate(self.A @ y) \ No newline at end of file diff --git a/src/primate/quadrature.py b/src/primate/quadrature.py new file mode 100644 index 0000000..aff1100 --- /dev/null +++ b/src/primate/quadrature.py @@ -0,0 +1,98 @@ +from typing import Union, Callable, Any +from numbers import Integral +import numpy as np +from scipy.sparse.linalg import LinearOperator +from scipy.linalg import solve_triangular +from scipy.stats import t +from scipy.special import erfinv +from numbers import Real + +## Package imports +from .random import _engine_prefixes, _engines, isotropic +from .special import _builtin_matrix_functions +from .operator import matrix_function +import _lanczos +import _trace +import _orthogonalize + + +def sl_gauss( + A: Union[LinearOperator, np.ndarray], + n: int = 150, + deg: int = 20, + pdf: str = "rademacher", + rng: str = "pcg", + seed: int = -1, + orth: int = 0, + num_threads: int = 0, +) -> np.ndarray: + """Stochastic Gaussian quadrature approximation. + + Computes a set of sample nodes and weights for the degree-k orthogonal polynomial approximating the + cumulative spectral measure of `A`. This function can be used to approximate the spectral density of `A`, + or to approximate the spectral sum of any function applied to the spectrum of `A`. + + Parameters + ---------- + A : ndarray, sparray, or LinearOperator + real symmetric operator. + n : int, default=150 + Number of random vectors to sample for the quadrature estimate. + deg : int, default=20 + Degree of the quadrature approximation. + rng : { 'splitmix64', 'xoshiro256**', 'pcg64', 'lcg64', 'mt64' }, default="pcg64" + Random number generator to use (PCG64 by default). + seed : int, default=-1 + Seed to initialize the `rng` entropy source. Set `seed` > -1 for reproducibility. + pdf : { 'rademacher', 'normal' }, default="rademacher" + Choice of zero-centered distribution to sample random vectors from. + orth: int, default=0 + Number of additional Lanczos vectors to orthogonalize against when building the Krylov basis. + num_threads: int, default=0 + Number of threads to use to parallelize the computation. Setting `num_threads` < 1 to let OpenMP decide. + + Returns + ------- + trace_estimate : float + Estimate of the trace of the matrix function $f(A)$. + info : dict, optional + If 'info = True', additional information about the computation. + + """ + attr_checks = [hasattr(A, "__matmul__"), hasattr(A, "matmul"), hasattr(A, "dot"), hasattr(A, "matvec")] + assert any(attr_checks), "Invalid operator; must have an overloaded 'matvec' or 'matmul' method" + assert hasattr(A, "shape") and len(A.shape) >= 2, "Operator must be at least two dimensional." + assert A.shape[0] == A.shape[1], "This function only works with square, symmetric matrices!" + + ## Choose the random number engine + assert rng in _engine_prefixes or rng in _engines, f"Invalid pseudo random number engine supplied '{str(rng)}'" + engine_id = _engine_prefixes.index(rng) if rng in _engine_prefixes else _engines.index(rng) + + ## Choose the distribution to sample random vectors from + assert pdf in ["rademacher", "normal"], f"Invalid distribution '{pdf}'; Must be one of 'rademacher' or 'normal'." + distr_id = ["rademacher", "normal"].index(pdf) + + ## Get the dtype; infer it if it's not available + f_dtype = (A @ np.zeros(A.shape[1])).dtype if not hasattr(A, "dtype") else A.dtype + assert ( + f_dtype.type == np.float32 or f_dtype.type == np.float64 + ), "Only 32- or 64-bit floating point numbers are supported." + + ## Extract the machine precision for the given floating point type + lanczos_rtol = np.finfo(f_dtype).eps # if lanczos_rtol is None else f_dtype.type(lanczos_rtol) + + ## Argument checking + m = A.shape[1] # Dimension of the space + nv = int(n) # Number of random vectors to generate + seed = int(seed) # Seed should be an integer + deg = max(deg, 2) # Must be at least 2 + orth = m - 1 if orth < 0 else min(m - 1, orth) # Number of additional vectors should be an integer + ncv = max(int(deg + orth), m) # Number of Lanczos vectors to keep in memory + num_threads = int(num_threads) # should be integer; if <= 0 will trigger max threads on C++ side + + ## Collect the arguments processed so far + sl_quad_args = (nv, distr_id, engine_id, seed, deg, lanczos_rtol, orth, ncv, num_threads) + + ## Make the actual call + quad_nw = _lanczos.stochastic_quadrature(A, *sl_quad_args) + return quad_nw diff --git a/src/primate/trace.py b/src/primate/trace.py index a5850d6..ee81a3d 100644 --- a/src/primate/trace.py +++ b/src/primate/trace.py @@ -1,4 +1,4 @@ -from typing import Union, Callable +from typing import Union, Callable, Any from numbers import Integral import numpy as np from scipy.sparse.linalg import LinearOperator @@ -10,15 +10,40 @@ ## Package imports from .random import _engine_prefixes, _engines, isotropic from .special import _builtin_matrix_functions -from .operator import matrix_function +from .operator import matrix_function, OrthComplement import _lanczos import _trace +import _orthogonalize _default_tvals = t.ppf((0.95 + 1.0) / 2.0, df=np.arange(30)+1) # from collections import namedtuple # HutchParams = namedtuple('HutchParams', ['a', 'b']) +def _operator_checks(A: Any) -> None: + attr_checks = [hasattr(A, "__matmul__"), hasattr(A, "matmul"), hasattr(A, "dot"), hasattr(A, "matvec")] + assert any(attr_checks), "Invalid operator; must have an overloaded 'matvec' or 'matmul' method" + assert hasattr(A, "shape") and len(A.shape) >= 2, "Operator must be at least two dimensional." + assert A.shape[0] == A.shape[1], "This function only works with square, symmetric matrices!" + assert hasattr(A, "shape"), "Operator 'A' must have a valid 'shape' attribute!" + +def _estimator_msg(info) -> str: + msg = f"{info['estimator']} estimator" + msg += f" (fun={info.get('function', None)}" + if info.get('lanczos_kwargs', None) is not None: + msg += f", deg={info['lanczos_kwargs'].get('deg', 20)}" + if info.get('quad', None) is not None: + msg += f", quad={info['quad']}" + msg += ")\n" + msg += f"Est: {info['estimate']:.3f}" + if 'margin_of_error' in info: + moe, conf, cv = (info[k] for k in ['margin_of_error', 'confidence', 'coeff_var']) + msg += f" +/- {moe:.3f} ({conf*100:.0f}% CI | {(cv*100):.0f}% CV)" + msg += f", (#S:{ info['n_samples'] } | #MV:{ info['n_matvecs']}) [{info['pdf'][0].upper()}]" + if info.get('seed', -1) != -1: + msg += f" (seed: {info['seed']})" + return msg + def hutch( A: Union[LinearOperator, np.ndarray], fun: Union[str, Callable] = None, @@ -51,7 +76,7 @@ def hutch( :::{.callout-note} Convergence behavior is controlled by the `stop` parameter: "confidence" uses the central limit theorem to generate confidence intervals on the fly, which may be used in conjunction with `atol` and `rtol` to upper-bound the error of the approximation. - Alternatively, when `stop` = "change", the estimator is considered converged when the error between the last two iterates is less than + Alternatively, when `stop` = 'change', the estimator is considered converged when the error between the last two iterates is less than `atol` (or `rtol`, respectively), similar to the behavior of scipy.integrate.quadrature. ::: @@ -101,10 +126,11 @@ def hutch( Notes ----- - To compute the weights of the quadrature, `quad` can be set to either 'golub_welsch' or 'fttr'. The former (GW) uses implicit symmetric QR steps with Wilkinson shifts, - while the latter (FTTR) uses the explicit expression for orthogonal polynomials. While both require $O(\\mathrm{deg}^2)$ time to execute, - the former requires $O(\\mathrm{deg}^2)$ space but is highly accurate, while the latter uses only $O(1)$ space at the cost of stability. - If `deg` is large, `fttr` is preferred. + To compute the weights of the quadrature, `quad` can be set to either 'golub_welsch' or 'fttr'. The former uses implicit symmetric QR + steps with Wilkinson shifts, while the latter (FTTR) uses the explicit expression for orthogonal polynomials. While both require + $O(\\mathrm{deg}^2)$ time to execute, the former requires $O(\\mathrm{deg}^2)$ space but is highly accurate, while the latter uses + only $O(1)$ space at the cost of stability. If `deg` is large, `fttr` is preferred is performance, though pilot testing should be + done to ensure that instability does not cause bias in the approximation. See Also -------- @@ -115,13 +141,10 @@ def hutch( 1. Ubaru, S., Chen, J., & Saad, Y. (2017). Fast estimation of tr(f(A)) via stochastic Lanczos quadrature. SIAM Journal on Matrix Analysis and Applications, 38(4), 1075-1099. 2. Hutchinson, Michael F. "A stochastic estimator of the trace of the influence matrix for Laplacian smoothing splines." Communications in Statistics-Simulation and Computation 18.3 (1989): 1059-1076. """ - attr_checks = [hasattr(A, "__matmul__"), hasattr(A, "matmul"), hasattr(A, "dot"), hasattr(A, "matvec")] - assert any(attr_checks), "Invalid operator; must have an overloaded 'matvec' or 'matmul' method" - assert hasattr(A, "shape") and len(A.shape) >= 2, "Operator must be at least two dimensional." - assert A.shape[0] == A.shape[1], "This function only works with square, symmetric matrices!" - + ## Quick + basic input validation checks + _operator_checks(A) + ## Catch degenerate cases - assert hasattr(A, "shape"), "Operator 'A' must have a valid 'shape' attribute!" if (np.prod(A.shape) == 0) or (np.sum(A.shape) == 0): return 0 @@ -190,33 +213,19 @@ def hutch( ## Make the actual call info_dict = _trace.hutch(A, *hutch_args, **kwargs) - ## Print the status if requested - if verbose: - msg = f"Girard-Hutchinson estimator (fun={kwargs['function']}, deg={deg}, quad={quad})\n" - valid_samples = info_dict['samples'] != 0 - n_valid = sum(valid_samples) - std_error = np.std(info_dict['samples'][valid_samples], ddof=1) / np.sqrt(n_valid) - z = np.sqrt(2.0) * erfinv(confidence) - cv = np.abs(std_error / info_dict['estimate']) - msg += f"Est: {info_dict['estimate']:.3f} +/- {z * std_error:.2f} ({confidence*100:.0f}% CI), CV: {(cv*100):.0f}%, " - msg += f"Evals: { n_valid } [{pdf[0].upper()}]" - if seed != -1: - msg += f" (seed: {seed})" - print(msg) - - ## If only the point-estimate is required, return it - if not info and not plot: + ## Return as early as possible if no additional info requested for speed + if not verbose and not info and not plot: return info_dict["estimate"] - - ## Otherwise build the info - if plot: - from bokeh.plotting import show - from .plotting import figure_trace - p = figure_trace(info_dict["samples"]) - show(p) - info_dict['figure'] = figure_trace(info_dict["samples"]) - - ## Build the info dictionary + + ## Post-process info dict + info_dict['estimator'] = "Girard-Hutchinson" + info_dict['valid'] = info_dict['samples'] != 0 + info_dict['n_samples'] = np.sum(info_dict['valid']) + info_dict['n_matvecs'] = info_dict['n_samples'] * deg + info_dict['std_error'] = np.std(info_dict['samples'][info_dict['valid']], ddof=1) / np.sqrt(info_dict['n_samples']) + info_dict['coeff_var'] = np.abs(info_dict['std_error'] / info_dict['estimate']) + info_dict['margin_of_error'] = (t_values[info_dict['n_samples']] if info_dict['n_samples'] < 30 else z) * info_dict['std_error'] + info_dict['confidence'] = confidence info_dict["stop"] = stop info_dict["pdf"] = pdf info_dict["rng"] = _engines[engine_id] @@ -228,94 +237,104 @@ def hutch( info_dict["atol"] = atol info_dict["num_threads"] = "auto" if num_threads == 0 else num_threads info_dict["maxiter"] = nv - info_dict["confidence"] = confidence - return info_dict["estimate"], info_dict - -# TODO: implement hutch++ -# def hutchpp(): - -def sl_gauss( - A: Union[LinearOperator, np.ndarray], - n: int = 150, - deg: int = 20, - pdf: str = "rademacher", - rng: str = "pcg", - seed: int = -1, - orth: int = 0, - num_threads: int = 0, -) -> np.ndarray: - """Stochastic Gaussian quadrature approximation. + ## Print the status if requested + if verbose: + print(_estimator_msg(info_dict)) - Computes a set of sample nodes and weights for the degree-k orthogonal polynomial approximating the - cumulative spectral measure of `A`. This function can be used to approximate the spectral density of `A`, - or to approximate the spectral sum of any function applied to the spectrum of `A`. + ## Plot samples if requested + if plot: + from bokeh.plotting import show + from .plotting import figure_trace + p = figure_trace(info_dict["samples"]) + show(p) + info_dict['figure'] = figure_trace(info_dict["samples"]) - Parameters - ---------- - A : ndarray, sparray, or LinearOperator - real symmetric operator. - n : int, default=150 - Number of random vectors to sample for the quadrature estimate. - deg : int, default=20 - Degree of the quadrature approximation. - rng : { 'splitmix64', 'xoshiro256**', 'pcg64', 'lcg64', 'mt64' }, default="pcg64" - Random number generator to use (PCG64 by default). - seed : int, default=-1 - Seed to initialize the `rng` entropy source. Set `seed` > -1 for reproducibility. - pdf : { 'rademacher', 'normal' }, default="rademacher" - Choice of zero-centered distribution to sample random vectors from. - orth: int, default=0 - Number of additional Lanczos vectors to orthogonalize against when building the Krylov basis. - num_threads: int, default=0 - Number of threads to use to parallelize the computation. Setting `num_threads` < 1 to let OpenMP decide. + ## Final return + return (info_dict["estimate"], info_dict) if info else info_dict["estimate"] - Returns - ------- - trace_estimate : float - Estimate of the trace of the matrix function $f(A)$. - info : dict, optional - If 'info = True', additional information about the computation. +def hutchpp( + A: Union[LinearOperator, np.ndarray], + fun: Union[str, Callable] = None, + b: int = "auto", + maxiter: int = 200, + mode: str = 'reduced', + **kwargs +) -> Union[float, dict]: + """Hutch++ estimator. """ - attr_checks = [hasattr(A, "__matmul__"), hasattr(A, "matmul"), hasattr(A, "dot"), hasattr(A, "matvec")] - assert any(attr_checks), "Invalid operator; must have an overloaded 'matvec' or 'matmul' method" - assert hasattr(A, "shape") and len(A.shape) >= 2, "Operator must be at least two dimensional." - assert A.shape[0] == A.shape[1], "This function only works with square, symmetric matrices!" - - ## Choose the random number engine - assert rng in _engine_prefixes or rng in _engines, f"Invalid pseudo random number engine supplied '{str(rng)}'" - engine_id = _engine_prefixes.index(rng) if rng in _engine_prefixes else _engines.index(rng) + _operator_checks(A) + if (np.prod(A.shape) == 0) or (np.sum(A.shape) == 0): + ## Catch degenerate cases + return 0 - ## Choose the distribution to sample random vectors from - assert pdf in ["rademacher", "normal"], f"Invalid distribution '{pdf}'; Must be one of 'rademacher' or 'normal'." - distr_id = ["rademacher", "normal"].index(pdf) + ## If fun is specified, transparently convert A to matrix function + if isinstance(fun, str): + assert fun in _builtin_matrix_functions, "If given as a string, matrix_function be one of the builtin functions." + A = matrix_function(A, fun=fun) + elif isinstance(fun, Callable): + A = matrix_function(A, fun=fun) + elif fun is not None: + raise ValueError(f"Invalid matrix function type '{type(fun)}'") - ## Get the dtype; infer it if it's not available + ## Setup constants + verbose, info = kwargs.get('verbose', False), kwargs.get('info', False) + N: int = A.shape[0] + b = (N // 3) if b == "auto" else b # main samples + m = (N // 3) if maxiter == "auto" else maxiter # residual samples + # assert m % 3 == 0, "Number of sample vectors 'm' must be divisible by 3." f_dtype = (A @ np.zeros(A.shape[1])).dtype if not hasattr(A, "dtype") else A.dtype - assert ( - f_dtype.type == np.float32 or f_dtype.type == np.float64 - ), "Only 32- or 64-bit floating point numbers are supported." - - ## Extract the machine precision for the given floating point type - lanczos_rtol = np.finfo(f_dtype).eps # if lanczos_rtol is None else f_dtype.type(lanczos_rtol) - - ## Argument checking - m = A.shape[1] # Dimension of the space - nv = int(n) # Number of random vectors to generate - seed = int(seed) # Seed should be an integer - deg = max(deg, 2) # Must be at least 2 - orth = m - 1 if orth < 0 else min(m - 1, orth) # Number of additional vectors should be an integer - ncv = max(int(deg + orth), m) # Number of Lanczos vectors to keep in memory - num_threads = int(num_threads) # should be integer; if <= 0 will trigger max threads on C++ side - - ## Collect the arguments processed so far - sl_quad_args = (nv, distr_id, engine_id, seed, deg, lanczos_rtol, orth, ncv, num_threads) - - ## Make the actual call - quad_nw = _lanczos.stochastic_quadrature(A, *sl_quad_args) - return quad_nw - + info_dict = {} + + ## Sketch Y / Q - use numpy for now, but consider parallelizing MGS later + WB = np.random.choice([-1.0, +1.0], size=(N, b)).astype(f_dtype) + Q = np.linalg.qr(A @ WB)[0] + # Y = np.array(A @ W2, order='F') + # assert Y.flags['F_CONTIGUOUS'] and Y.flags['OWNDATA'] and Y.flags['WRITEABLE'] + # _orthogonalize.mgs(Y, 0) + # Q = Y # Q is mostly orthonormal + + ## Estimate trace of the low-rank approx. / sketch + bulk_tr = 0.0 + if mode == 'full': + bulk_tr = (Q.T @ (A @ Q)).trace() + else: + bulk_tr = np.sum(_trace.quad_sum(A, Q)) + + ## Estimate trace of the residual + residual_tr = 0.0 + if mode == 'full': + ## Full mode == form the full (m x m) matrix and take the diagonal + ## Note memory efficient, but is vectorized, so suitable for relatively small m + WM = np.random.choice([-1.0, +1.0], size=(N, m)).astype(f_dtype) + G = WM - Q @ (Q.T @ WM) + residual_tr += (1 / m) * (G.T @ (A @ G)).trace() + else: + ## reduced mode == Switch to plain Hutch estimator on orthogonal complement projector + ## Low memory footprint but might be 5x slower or worse on small inputs + PC = OrthComplement(A, Q) # evaluates (I - Q^T Q)A(I - Q^T Q) + kwargs['maxiter'] = kwargs.get('maxiter', m) + if not info and not verbose: + residual_tr = hutch(PC, **kwargs) + return bulk_tr + residual_tr + else: + kwargs['info'] = True + kwargs['verbose'] = False + residual_tr, ID = hutch(PC, **kwargs) + info_dict.update(ID) + + ## Modify the info dict + info_dict['estimate'] = bulk_tr + residual_tr + info_dict['estimator'] = 'Hutch++' + info_dict['n_matvecs'] = 2*b + info_dict.get('n_samples', m) + info_dict['n_samples'] = b + info_dict.get('n_samples', m) + info_dict['pdf'] = 'rademacher' + + ## Print as needed + if verbose: + print(_estimator_msg(info_dict)) + return info_dict['estimate'] if not info else (info_dict['estimate'], info_dict) def __xtrace(W: np.ndarray, Z: np.ndarray, Q: np.ndarray, R: np.ndarray, method: str): """Helper for xtrace function""" @@ -355,8 +374,8 @@ def xtrace( fun: Union[str, Callable] = None, nv: Union[str, int] = "auto", pdf: str = "sphere", - atol: float = 0.1, - rtol: float = 1e-6, + atol: float = 0.0, + rtol: float = 0.0, cond_tol: float = 1e8, verbose: int = 0, info: bool = False, @@ -367,7 +386,7 @@ def xtrace( nv = int(nv) if isinstance(nv, Integral) else int(np.ceil(np.sqrt(A.shape[0]))) n = A.shape[0] - ## Transparently convert A to matrix function + ## If fun is specified, transparently convert A to matrix function if isinstance(fun, str): assert fun in _builtin_matrix_functions, "If given as a string, matrix_function be one of the builtin functions." A = matrix_function(A, fun=fun) @@ -402,6 +421,9 @@ def xtrace( if verbose > 0: print(f"It: {it}, est: {t:.8f}, Y_size: {Y.shape}, error: {err:.8f}") + if err <= atol: + break + if info: info = {"estimate": t, "samples": t_samples, "error": err } return t, info diff --git a/tests/test_slq.py b/tests/test_slq.py index a684309..9e58fc7 100644 --- a/tests/test_slq.py +++ b/tests/test_slq.py @@ -15,7 +15,7 @@ def test_stochastic_quadrature(): from primate.diagonalize import _lanczos assert hasattr(_lanczos, "stochastic_quadrature"), "Module compile failed" - from primate.trace import sl_gauss + from primate.quadrature import sl_gauss np.random.seed(1234) n, nv, lanczos_deg = 30, 250, 20 A = csc_array(symmetric(30), dtype=np.float32) diff --git a/tests/test_trace.py b/tests/test_trace.py index ccceb26..8c67243 100644 --- a/tests/test_trace.py +++ b/tests/test_trace.py @@ -15,167 +15,236 @@ ## NOTE: trace estimation only works with isotropic vectors def test_girard_fixed(): - from sanity import girard_hutch - np.random.seed(1234) - n = 30 - ew = 0.2 + 1.5*np.linspace(0, 5, n) - Q,R = np.linalg.qr(np.random.uniform(size=(n,n))) - A = Q @ np.diag(ew) @ Q.T - A = (A + A.T) / 2 - ew_true = np.linalg.eigvalsh(A) - tr_est = girard_hutch(A, lambda x: x, nv = n, estimates=False) - threshold = 0.05*(np.max(ew)*n - np.min(ew)*n) - assert np.allclose(ew_true, ew) - assert np.isclose(A.trace() - tr_est, 0.0, atol=threshold) + from sanity import girard_hutch + np.random.seed(1234) + n = 30 + ew = 0.2 + 1.5*np.linspace(0, 5, n) + Q,R = np.linalg.qr(np.random.uniform(size=(n,n))) + A = Q @ np.diag(ew) @ Q.T + A = (A + A.T) / 2 + ew_true = np.linalg.eigvalsh(A) + tr_est = girard_hutch(A, lambda x: x, nv = n, estimates=False) + threshold = 0.05*(np.max(ew)*n - np.min(ew)*n) + assert np.allclose(ew_true, ew) + assert np.isclose(A.trace() - tr_est, 0.0, atol=threshold) def test_trace_import(): - import primate.trace - assert '_trace' in dir(primate.trace) - from primate.trace import hutch, _trace - assert 'hutch' in dir(_trace) - assert isinstance(hutch, Callable) + import primate.trace + assert '_trace' in dir(primate.trace) + from primate.trace import hutch, _trace + assert 'hutch' in dir(_trace) + assert isinstance(hutch, Callable) def test_trace_basic(): - from primate.trace import hutch - np.random.seed(1234) - n = 10 - A = symmetric(n) - tr_test1 = hutch(A, maxiter=100, seed=5, num_threads=1) - tr_test2 = hutch(A, maxiter=100, seed=5, num_threads=1) - tr_true = A.trace() - assert tr_test1 == tr_test2, "Builds not reproducible!" - assert np.isclose(tr_test1, tr_true, atol=tr_true*0.05) + from primate.trace import hutch + np.random.seed(1234) + n = 10 + A = symmetric(n) + tr_test1 = hutch(A, maxiter=100, seed=5, num_threads=1) + tr_test2 = hutch(A, maxiter=100, seed=5, num_threads=1) + tr_true = A.trace() + assert tr_test1 == tr_test2, "Builds not reproducible!" + assert np.isclose(tr_test1, tr_true, atol=tr_true*0.05) def test_trace_pdfs(): - from primate.trace import hutch - np.random.seed(1234) - n = 50 - A = symmetric(n) - tr_test1 = hutch(A, maxiter=200, seed=5, num_threads=1, pdf="rademacher") - tr_test2 = hutch(A, maxiter=200, seed=5, num_threads=1, pdf="normal") - tr_true = A.trace() - assert np.isclose(tr_test1, tr_test2, atol=tr_true*0.05) + from primate.trace import hutch + np.random.seed(1234) + n = 50 + A = symmetric(n) + tr_test1 = hutch(A, maxiter=200, seed=5, num_threads=1, pdf="rademacher") + tr_test2 = hutch(A, maxiter=200, seed=5, num_threads=1, pdf="normal") + tr_true = A.trace() + assert np.isclose(tr_test1, tr_test2, atol=tr_true*0.05) def test_trace_inputs(): - from primate.trace import hutch - n = 10 - A = symmetric(n) - tr_1 = hutch(A, maxiter=100) - tr_2 = hutch(csc_array(A), maxiter=100) - tr_3 = hutch(aslinearoperator(A), maxiter=100) - assert all([isinstance(t, Number) for t in [tr_1, tr_2, tr_3]]) + from primate.trace import hutch + n = 10 + A = symmetric(n) + tr_1 = hutch(A, maxiter=100) + tr_2 = hutch(csc_array(A), maxiter=100) + tr_3 = hutch(aslinearoperator(A), maxiter=100) + assert all([isinstance(t, Number) for t in [tr_1, tr_2, tr_3]]) def test_hutch_info(): - from primate.trace import hutch - np.random.seed(1234) - n = 25 - A = csc_array(symmetric(n), dtype=np.float32) - tr_est, info = hutch(A, maxiter = 200, info=True) - assert isinstance(info, dict) and isinstance(tr_est, Number) - assert len(info['samples']) == 200 - assert np.all(~np.isclose(info['samples'], 0.0)) - assert np.isclose(tr_est, A.trace(), atol=1.0) + from primate.trace import hutch + np.random.seed(1234) + n = 25 + A = csc_array(symmetric(n), dtype=np.float32) + tr_est, info = hutch(A, maxiter = 200, info=True) + assert isinstance(info, dict) and isinstance(tr_est, Number) + assert len(info['samples']) == 200 + assert np.all(~np.isclose(info['samples'], 0.0)) + assert np.isclose(tr_est, A.trace(), atol=1.0) def test_hutch_multithread(): - from primate.trace import hutch - np.random.seed(1234) - n = 25 - A = csc_array(symmetric(n), dtype=np.float32) - tr_est, info = hutch(A, maxiter = 200, atol=0.0, info = True, num_threads=6) - assert len(info['samples'] == 200) - assert np.all(~np.isclose(info['samples'], 0.0)) - assert np.isclose(tr_est, A.trace(), atol=1.0) + from primate.trace import hutch + np.random.seed(1234) + n = 25 + A = csc_array(symmetric(n), dtype=np.float32) + tr_est, info = hutch(A, maxiter = 200, atol=0.0, info = True, num_threads=6) + assert len(info['samples'] == 200) + assert np.all(~np.isclose(info['samples'], 0.0)) + assert np.isclose(tr_est, A.trace(), atol=1.0) def test_hutch_clt_atol(): - from primate.trace import hutch - np.random.seed(1234) - n = 30 - A = csc_array(symmetric(n), dtype=np.float32) - - from primate.stats import sample_mean_cinterval - tr_est, info = hutch(A, maxiter = 100, num_threads=1, seed=5, info=True) - tr_samples = info['samples'] - ci = np.array([sample_mean_cinterval(tr_samples[:i], sdist='normal') if i > 1 else [-np.inf, np.inf] for i in range(len(tr_samples))]) - - ## Detect when, for the fixed set of samples, the trace estimator should converge by CLT - atol_threshold = (A.trace() * 0.05) - clt_converged = np.ravel(0.5*np.diff(ci, axis=1)) <= atol_threshold - assert np.any(clt_converged), "Did not converge!" - converged_ind = np.flatnonzero(clt_converged)[0] - - ## Re-run with same seed and ensure the index matches - tr_est, info = hutch(A, maxiter = 100, num_threads=1, atol=atol_threshold, seed=5, info=True) - tr_samples = info['samples'] - converged_online = np.take(np.flatnonzero(tr_samples == 0.0), 0) - assert converged_online == converged_ind, "hutch not converging at correct index!" + from primate.trace import hutch + np.random.seed(1234) + n = 30 + A = csc_array(symmetric(n), dtype=np.float32) + + from primate.stats import sample_mean_cinterval + tr_est, info = hutch(A, maxiter = 100, num_threads=1, seed=5, info=True) + tr_samples = info['samples'] + ci = np.array([sample_mean_cinterval(tr_samples[:i], sdist='normal') if i > 1 else [-np.inf, np.inf] for i in range(len(tr_samples))]) + + ## Detect when, for the fixed set of samples, the trace estimator should converge by CLT + atol_threshold = (A.trace() * 0.05) + clt_converged = np.ravel(0.5*np.diff(ci, axis=1)) <= atol_threshold + assert np.any(clt_converged), "Did not converge!" + converged_ind = np.flatnonzero(clt_converged)[0] + + ## Re-run with same seed and ensure the index matches + tr_est, info = hutch(A, maxiter = 100, num_threads=1, atol=atol_threshold, seed=5, info=True) + tr_samples = info['samples'] + converged_online = np.take(np.flatnonzero(tr_samples == 0.0), 0) + assert converged_online == converged_ind, "hutch not converging at correct index!" def test_hutch_change(): - from primate.trace import hutch - np.random.seed(1234) - n = 30 - A = csc_array(symmetric(n), dtype=np.float32) - tr_est, info = hutch(A, maxiter = 100, num_threads=1, seed=5, info=True) - tr_samples = info['samples'] - estimator = np.cumsum(tr_samples) / np.arange(1, 101) - conv_ind_true = np.flatnonzero(np.abs(np.diff(estimator)) <= 0.001)[0] + 1 - - ## Test the convergence checking for the atol change method - tr_est, info = hutch(A, maxiter = 100, num_threads=1, seed=5, info=True, atol=0.001, stop="change") - conv_ind_test = np.take(np.flatnonzero(info['samples'] == 0), 0) - assert abs(conv_ind_true - conv_ind_test) <= 1 + from primate.trace import hutch + np.random.seed(1234) + n = 30 + A = csc_array(symmetric(n), dtype=np.float32) + tr_est, info = hutch(A, maxiter = 100, num_threads=1, seed=5, info=True) + tr_samples = info['samples'] + estimator = np.cumsum(tr_samples) / np.arange(1, 101) + conv_ind_true = np.flatnonzero(np.abs(np.diff(estimator)) <= 0.001)[0] + 1 + + ## Test the convergence checking for the atol change method + tr_est, info = hutch(A, maxiter = 100, num_threads=1, seed=5, info=True, atol=0.001, stop="change") + conv_ind_test = np.take(np.flatnonzero(info['samples'] == 0), 0) + assert abs(conv_ind_true - conv_ind_test) <= 1 def test_trace_mf(): - from primate.trace import hutch - n = 10 - A = symmetric(n) - tr_est = hutch(A, fun="identity", maxiter=100, num_threads=1, seed = 5) - tr_true = A.trace() - assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) - tr_est = hutch(A, fun=lambda x: x, maxiter=100, num_threads=1, seed = 5) - assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) + from primate.trace import hutch + n = 10 + A = symmetric(n) + tr_est = hutch(A, fun="identity", maxiter=100, num_threads=1, seed = 5) + tr_true = A.trace() + assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) + tr_est = hutch(A, fun=lambda x: x, maxiter=100, num_threads=1, seed = 5) + assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) def test_trace_fftr(): - from primate.trace import hutch - n = 50 - A = symmetric(n) - - ## Test the fttr against the golub_welsch - tr_est1, info1 = hutch(A, fun="identity", maxiter=100, seed=5, num_threads=1, info=True, quad="golub_welsch") - tr_est2, info2 = hutch(A, fun="identity", maxiter=100, seed=5, num_threads=1, info=True, quad="fttr") - assert np.isclose(tr_est1, tr_est2, atol=tr_est1*0.01) - - ## Test accuracy - assert np.isclose(A.trace(), tr_est1, atol=tr_est1*0.025) - - # from primate.diagonalize import lanczos, _lanczos - # v0 = np.array([-1, 1, 1,-1, 1,-1, 1,-1,-1, 1]) / np.sqrt(10) - # a, b = lanczos(A, v0=v0, deg=10) - # a, b = a, np.append([0], b) - # _lanczos.quadrature(a, b, 10, 0) - - - # from primate.operator import matrix_function - # M = matrix_function(A, fun="identity") - # M.method = "fttr" - # M.quad(np.random.choice([-1.0, +1.0], size=n)) - - ## TODO - # tr_est, info = hutch(A, fun=lambda x: x, maxiter=100, seed=5, num_threads=1, info=True) - # assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) - # for s in range(15000): - # est, info = hutch(A, fun="identity", deg=2, maxiter=200, num_threads=1, seed=591, info=True) - # assert not np.isnan(est) - # for s in range(15000): - # est, info = hutch(A, fun="identity", deg=20, maxiter=200, num_threads=8, seed=-1, info=True) - # assert not np.isnan(est) - - # from primate.operator import matrix_function - # M = matrix_function(A, fun="identity", deg=20) - # for s in range(15000): - # v0 = np.random.choice([-1, 1], size=M.shape[0]) - # assert not np.isnan(M.quad(v0)) - - - # from primate.diagonalize import lanczos - # lanczos(A, v0=v0, rtol=M.rtol, deg=M.deg, orth=M.orth) - # if np.any(np.isnan(info['samples'])) \ No newline at end of file + from primate.trace import hutch + n = 50 + A = symmetric(n) + + ## Test the fttr against the golub_welsch + tr_est1, info1 = hutch(A, fun="identity", maxiter=100, seed=5, num_threads=1, info=True, quad="golub_welsch") + tr_est2, info2 = hutch(A, fun="identity", maxiter=100, seed=5, num_threads=1, info=True, quad="fttr") + assert np.isclose(tr_est1, tr_est2, atol=tr_est1*0.01) + + ## Test accuracy + assert np.isclose(A.trace(), tr_est1, atol=tr_est1*0.025) + + # from primate.diagonalize import lanczos, _lanczos + # v0 = np.array([-1, 1, 1,-1, 1,-1, 1,-1,-1, 1]) / np.sqrt(10) + # a, b = lanczos(A, v0=v0, deg=10) + # a, b = a, np.append([0], b) + # _lanczos.quadrature(a, b, 10, 0) + + + # from primate.operator import matrix_function + # M = matrix_function(A, fun="identity") + # M.method = "fttr" + # M.quad(np.random.choice([-1.0, +1.0], size=n)) + + ## TODO + # tr_est, info = hutch(A, fun=lambda x: x, maxiter=100, seed=5, num_threads=1, info=True) + # assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) + # for s in range(15000): + # est, info = hutch(A, fun="identity", deg=2, maxiter=200, num_threads=1, seed=591, info=True) + # assert not np.isnan(est) + # for s in range(15000): + # est, info = hutch(A, fun="identity", deg=20, maxiter=200, num_threads=8, seed=-1, info=True) + # assert not np.isnan(est) + + # from primate.operator import matrix_function + # M = matrix_function(A, fun="identity", deg=20) + # for s in range(15000): + # v0 = np.random.choice([-1, 1], size=M.shape[0]) + # assert not np.isnan(M.quad(v0)) + + + # from primate.diagonalize import lanczos + # lanczos(A, v0=v0, rtol=M.rtol, deg=M.deg, orth=M.orth) + # if np.any(np.isnan(info['samples'])) + +def test_quad_sum(): + from primate.trace import _trace + np.random.seed(1234) + n = 100 + A = symmetric(n) + Q = np.linalg.qr(A)[0] + test_quads = _trace.quad_sum(A, Q) + true_quads = (Q.T @ (A @ Q)).diagonal() + assert np.allclose(test_quads, true_quads) + assert np.isclose(np.sum(test_quads), np.sum(true_quads)) + test_quads = _trace.quad_sum(A, Q[:,:10]) + true_quads = (Q[:,:10].T @ (A @ Q[:,:10])).diagonal() + assert np.allclose(test_quads, true_quads) + assert np.isclose(np.sum(test_quads), np.sum(true_quads)) + +def test_hutch_pp(): + from primate.trace import hutchpp + np.random.seed(1234) + n = 100 + A = symmetric(n) + test_trace = hutchpp(A, mode="reduced", seed=1) + true_trace = A.trace() + assert np.isclose(test_trace, true_trace, atol=1.0) + test_trace = hutchpp(A, mode="full", seed=1) + assert np.isclose(test_trace, true_trace, atol=1.0) + + # import timeit + # timeit.timeit(lambda: hutchpp(A, mode="full"), number=1000) + # timeit.timeit(lambda: hutchpp(A, mode="reduced"), number=1000) + + # ## Raw isotropic random vectors + # m = 20 * 3 + # N: int = A.shape[0] + # M: int = 2 * m // 3 + # f_dtype = (A @ np.zeros(A.shape[1])).dtype if not hasattr(A, "dtype") else A.dtype + # W = np.random.choice([-1.0, +1.0], size=(N, M)).astype(f_dtype) + # W1, W2 = W[:,:(m // 3)], W[:,(m // 3):] + # Q = np.linalg.qr(A @ W2)[0] + + # ## Start with estimate using the largest eigen-spaces + # bulk_tr = (Q.T @ (A @ Q)).trace() + + # ## Estimate residual via Girard + # # from scipy.sparse.linalg import aslinearoperator, LinearOperator + # # B = np.eye(A.shape[0]) - Q @ Q.T + # # deflate_proj = lambda w: B @ (A @ (B @ w)) + # # L = LinearOperator(matvec = deflate_proj, shape=A.shape) + # from primate.operator import OrthComplement + # L = OrthComplement(A, Q) + + # from primate.trace import hutch + # true_residual = A.trace() - bulk_tr + # np.abs((bulk_tr + hutch(L, atol = 0.10, maxiter=1000, verbose=True)) - A.trace()) + # hutch(L) + # hutch(L, atol = 0.001, maxiter=200, deg=40, verbose=True, plot=True) + # 0.9534050697745574 - 0.003 <= true_residual + # true_residual <= 0.9534050697745574 + 0.003 + + # residual_tr = 0.0 + # if True: + # G = W1 - Q @ Q.T @ W1 + # residual_tr = (1 / (m // 3)) * (G.T @ (A @ G)).trace() + + # print(f"A trace: {A.trace():8f}") + # for i in range(1, A.shape[0]): + # np.random.seed(1234) + # est = hutch_pp(A, m = 3 * i) + # print(f"{i}: {est} (error: {np.abs(est - A.trace())})") \ No newline at end of file