diff --git a/.gitignore b/.gitignore index bdd5bb4..7eb4c23 100644 --- a/.gitignore +++ b/.gitignore @@ -83,7 +83,7 @@ celerybeat-schedule .env # virtualenv -.venv +.venv* venv/ ENV/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 1417c4c..a4be735 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,104 +2,146 @@ All notable changes to **pentapy** will be documented in this file. +## [2.0.0] - 2024-06 + +See [#27](https://github.com/GeoStat-Framework/pentapy/pull/27) + +### Breaking Changes + +- fully parallelized the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support that can now be enabled via the new `num_threads` parameter in `pentapy.solve` (default: 1) +- updated the **Cython low level interfaces** to PTRANS-I and PTRANS-II to **only accept C-contiguous arrays** (not backwards compatible) + +### Enhancements + +- improved error handling and added debug information to error messages + +### Changes + +- refactored and documented the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support +- fully typed the `pentapy.tools` module and the function ``pentapy.solve` +- made internal solver alias handling of `pentapy.solve` smarter, more robust, and removed all duplicate code +- gave all solvers a consistent interface +- made code in ``pentapy.core` `more maintainable +- fixed typos in documentation + +### Bugfixes + +- fixed error handling in case of zero-division to trigger dead error handling branch (see [Issue 23](https://github.com/GeoStat-Framework/pentapy/issues/23)) +- fixed edge case error for row/column count of 3 (see [Issue 24](https://github.com/GeoStat-Framework/pentapy/issues/24)) + +### Tests + +- transitioned from `unittest`-based testing to fully `pytest`-based testing with parametrized and parallelized exhaustive testing (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25)) +- made actual tests more meaningful by comparing them to LAPACK as reference standard (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25)) +- included external solver bindings accessible via `pentapy.solve` as part of the test suite +- increased true coverage (not line-hit coverage) close to 100% ## [1.3.0] - 2024-04 See [#21](https://github.com/GeoStat-Framework/pentapy/pull/21) ### Enhancements + - added support for python 3.12 - added support for numpy 2 - build extensions with numpy 2 and cython 3 ### Changes + - dropped python 3.7 support - dropped 32bit builds - linted cython files - increase maximal line length to 88 (black default) - ## [1.2.0] - 2023-04 See [#19](https://github.com/GeoStat-Framework/pentapy/pull/19) ### Enhancements + - added support for python 3.10 and 3.11 - add wheels for arm64 systems - created `solver.pxd` file to be able to cimport the solver module - added a `CITATION.bib` file ### Changes + - move to `src/` based package structure - dropped python 3.6 support - move meta-data to pyproject.toml - simplified documentation ### Bugfixes + - determine correct version when installing from archive ## [1.1.2] - 2021-07 ### Changes + - new package structure with `pyproject.toml` ([#15](https://github.com/GeoStat-Framework/pentapy/pull/15)) - Sphinx-Gallery for Examples - Repository restructuring: use a single `main` branch - use `np.asarray` in `solve` to speed up computation ([#17](https://github.com/GeoStat-Framework/pentapy/pull/17)) - ## [1.1.1] - 2021-02 ### Enhancements + - Python 3.9 support ### Changes -- GitHub Actions for CI +- GitHub Actions for CI ## [1.1.0] - 2020-03-22 ### Enhancements + - Python 3.8 support ### Changes + - python only builds are no longer available - Python 2.7 and 3.4 support dropped - ## [1.0.3] - 2019-11-10 ### Enhancements + - the algorithms `PTRANS-I` and `PTRANS-II` now raise a warning when they can not solve the given system - there are now switches to install scipy and umf solvers as extra requirements ### Bugfixes -- multiple minor bugfixes +- multiple minor bugfixes ## [1.0.0] - 2019-09-18 ### Enhancements -- the second algorithm `PTRANS-II` from *Askar et al. 2015* is now implemented and can be used by `solver=2` + +- the second algorithm `PTRANS-II` from _Askar et al. 2015_ is now implemented and can be used by `solver=2` - the package is now tested and a coverage is calculated - there are now pre-built binaries for Python 3.7 - the documentation is now available under https://geostat-framework.readthedocs.io/projects/pentapy ### Changes -- pentapy is now licensed under the MIT license +- pentapy is now licensed under the MIT license ## [0.1.1] - 2019-03-08 ### Bugfixes -- MANIFEST.in was missing in the 0.1.0 version +- MANIFEST.in was missing in the 0.1.0 version ## [0.1.0] - 2019-03-07 This is the first release of pentapy, a python toolbox for solving pentadiagonal linear equation systems. The solver is implemented in cython, which makes it really fast. - +[2.0.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.4.0...v2.0.0 +[1.4.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.3.0...v1.4.0 [1.3.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.2.0...v1.3.0 [1.2.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.1.2...v1.2.0 [1.1.2]: https://github.com/GeoStat-Framework/pentapy/compare/v1.1.1...v1.1.2 diff --git a/README.md b/README.md index 6e5983a..068ab5b 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ Have a look at the script: [``examples/03_perform_simple.py``](https://github.co ## Requirements: -- [NumPy >= 1.14.5](https://www.numpy.org) +- [NumPy >= 1.20.0](https://www.numpy.org) ### Optional diff --git a/docs/source/index.rst b/docs/source/index.rst index 8433404..93d9a5d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -94,7 +94,7 @@ The performance plot was created with ``perfplot`` (`link = 1.14.5 `_ +- `Numpy >= 1.20.0 `_ Optional -------- diff --git a/examples/README.rst b/examples/README.rst index ea7bac4..a1b7ae0 100644 --- a/examples/README.rst +++ b/examples/README.rst @@ -88,7 +88,7 @@ If M is a full matrix, you call the following: X = pp.solve(M, Y) -If M is flattend in row-wise order you have to set the keyword argument ``is_flat=True``: +If M is flattened in row-wise order you have to set the keyword argument ``is_flat=True``: .. code-block:: python @@ -99,7 +99,7 @@ If M is flattend in row-wise order you have to set the keyword argument ``is_fla X = pp.solve(M, Y, is_flat=True) -If you got a col-wise flattend matrix you have to set ``index_row_wise=False``: +If you got a col-wise flattened matrix you have to set ``index_row_wise=False``: .. code-block:: python diff --git a/paper/paper.md b/paper/paper.md index 53c6f04..fa0fdc3 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -51,8 +51,8 @@ $$ Here, $d_i$ are the diagonal entries and $d_i^{(j)}$ represent the $j$-th minor diagonal. -Recently, @askar presented two algorithms to -solve the linear systems of equations for $X$, ``PTRANS-I`` and ``PTRANS-II``, +Recently, @askar presented two algorithms to +solve the linear systems of equations for $X$, ``PTRANS-I`` and ``PTRANS-II``, applying first transformation to a triangular matrix and then, respectively, backward and forward substitution. ``pentapy`` provides Cython [@cython] implementations of these algorithms and a set of tools to convert matrices to row-wise or @@ -73,7 +73,7 @@ The linear algebra solver of NumPy [@numpy] served as a standard reference, whic ``pentapy`` is designed to provide a fast solver for the special case of a pentadiagonal linear system. To the best of the author's knowledge, this package outperforms the current algorithms for solving pentadiagonal systems in Python. -The solver can handle different input formats of the coefficient matrix, i.e., a flattend matrix or a +The solver can handle different input formats of the coefficient matrix, i.e., a flattened matrix or a quadratic matrix. diff --git a/pyproject.toml b/pyproject.toml index 7b0aec6..57bed29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ requires = [ "setuptools_scm>=7", "numpy>=2.0.0rc1,<2.3; python_version >= '3.9'", "oldest-supported-numpy; python_version < '3.9'", + "extension-helpers>=1", "Cython>=3.0.10,<3.1.0", ] build-backend = "setuptools.build_meta" @@ -35,15 +36,14 @@ classifiers = [ "Topic :: Scientific/Engineering", "Topic :: Utilities", ] -dependencies = ["numpy>=1.20.0"] +dependencies = [ + "numpy>=1.20.0", +] [project.optional-dependencies] scipy = ["scipy"] umfpack = ["scikit-umfpack"] -all = [ - "scipy", - "scikit-umfpack", -] +all = ["scipy", "scikit-umfpack"] doc = [ "m2r2>=0.2.8", "scipy>=1.1.0", @@ -54,12 +54,17 @@ doc = [ "sphinx-gallery>=0.8", "sphinx-rtd-theme>=2", ] -test = ["pytest-cov>=3"] +test = [ + "pytest>=8", + "pytest-cov>=3", + "pytest-xdist>=3", + "scipy>=1.1.0", +] check = [ - "black>=24,<25", - "isort[colors]", - "pylint", - "cython-lint", + "black>=24,<25", + "isort[colors]", + "pylint", + "cython-lint", ] [project.urls] @@ -103,6 +108,8 @@ max-line-length = 120 "*examples*", "*tests*", "*paper*", + "src/pentapy/_version.py", + "src/pentapy/__init__.py", ] [tool.coverage.report] @@ -136,6 +143,9 @@ max-line-length = 120 max-attributes = 25 max-public-methods = 75 +[tool.pytest.ini_options] +addopts = "--doctest-modules" + [tool.cibuildwheel] # Switch to using build build-frontend = "build" diff --git a/setup.py b/setup.py index 8081e05..0c23078 100644 --- a/setup.py +++ b/setup.py @@ -1,23 +1,58 @@ -"""pentapy: A toolbox for pentadiagonal matrizes.""" +"""pentapy: A toolbox for pentadiagonal matrices.""" + +# === Imports === import os import numpy as np from Cython.Build import cythonize +from extension_helpers import add_openmp_flags_if_available from setuptools import Extension, setup +# === Constants === + +# the environment variable key for the build of the serial/parallel version +PENTAPY_BUILD_PARALLEL = "PENTAPY_BUILD_PARALLEL" +# the compiler flags for the OpenMP parallelization +OPENMP = "OPENMP" +# the number of threads for the Cython build +CYTHON_BUILD_NUM_THREADS = 1 + +# === Setup === + # cython extensions CY_MODULES = [ Extension( - name=f"pentapy.solver", + name="pentapy.solver", sources=[os.path.join("src", "pentapy", "solver.pyx")], include_dirs=[np.get_include()], define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], ) ] +# the OpenMP link is added if available/requested +# the environment variables can be PENTAPY_BUILD_PARALLEL = 0 (builds serial version) or +# PENTAPY_BUILD_PARALLEL != 0 (builds parallel version) +with_open_mp = False +if int(os.environ.get(PENTAPY_BUILD_PARALLEL, "0")): + openmp_added = [add_openmp_flags_if_available(mod) for mod in CY_MODULES] + with_open_mp = any(openmp_added) + if with_open_mp: + open_mp_str = "linking OpenMP (parallel version)" + else: + open_mp_str = "not linking OpenMP (serial version)" + + print(f"PENTAPY SETUP - {open_mp_str}") + +else: + print("PENTAPY SETUP - OpenMP not requested (serial version)") + setup( - ext_modules=cythonize(CY_MODULES), + ext_modules=cythonize( + CY_MODULES, + nthreads=CYTHON_BUILD_NUM_THREADS, + compile_time_env={OPENMP: with_open_mp}, + ), package_data={"pentapy": ["*.pxd"]}, # include pxd files include_package_data=False, # ignore other files zip_safe=False, diff --git a/src/pentapy/.gitignore b/src/pentapy/.gitignore new file mode 100644 index 0000000..53cc0d6 --- /dev/null +++ b/src/pentapy/.gitignore @@ -0,0 +1,2 @@ +# Cython html files +*.html \ No newline at end of file diff --git a/src/pentapy/__init__.py b/src/pentapy/__init__.py index 655c428..3705064 100644 --- a/src/pentapy/__init__.py +++ b/src/pentapy/__init__.py @@ -44,7 +44,7 @@ try: from pentapy._version import __version__ -except ImportError: # pragma: nocover +except ImportError: # pragma: no cover # package is not installed __version__ = "0.0.0.dev0" diff --git a/src/pentapy/_models.py b/src/pentapy/_models.py new file mode 100644 index 0000000..16119cd --- /dev/null +++ b/src/pentapy/_models.py @@ -0,0 +1,66 @@ +""" +Auxiliary models for the pentapy package. + +""" + +# === Imports === + +from enum import IntEnum +from typing import Dict + +# === Models === + + +class Infos(IntEnum): + """ + Defines the possible returns for ``info`` of the low level pentadiagonal solvers, + namely + + - ``SUCCESS``: the solver has successfully solved the system + - ``SHAPE_MISMATCH``: the shape of the input arrays is incorrect + - ``WRONG_SOLVER``: the solver alias is the solver alias is incorrect on C-level + (internal error, should not occur) + + """ + + SUCCESS = 0 + SHAPE_MISMATCH = -1 + WRONG_SOLVER = -2 + + +class PentaSolverAliases(IntEnum): + """ + Defines all available solver aliases for pentadiagonal systems, namely + + - ``PTRANS_I``: the PTRANS-I algorithm + - ``PTRANS_II``: the PTRANS-II algorithm + - ``LAPACK``: Scipy's LAPACK solver :func:`scipy.linalg.solve_banded` + - ``SUPER_LU``: Scipy's SuperLU solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` + - ``UMFPACK``: Scipy's UMFpack solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=True)` + + """ # noqa: E501 + + PTRANS_I = 1 + PTRANS_II = 2 + LAPACK = 3 + SUPER_LU = 4 + UMFPACK = 5 + + +# === Constants === + +_SOLVER_ALIAS_CONVERSIONS: Dict[str, PentaSolverAliases] = { + "1": PentaSolverAliases.PTRANS_I, + "ptrans-i": PentaSolverAliases.PTRANS_I, + "2": PentaSolverAliases.PTRANS_II, + "ptrans-ii": PentaSolverAliases.PTRANS_II, + "3": PentaSolverAliases.LAPACK, + "lapack": PentaSolverAliases.LAPACK, + "solve_banded": PentaSolverAliases.LAPACK, + "4": PentaSolverAliases.SUPER_LU, + "spsolve": PentaSolverAliases.SUPER_LU, + "5": PentaSolverAliases.UMFPACK, + "spsolve_umf": PentaSolverAliases.UMFPACK, + "umf": PentaSolverAliases.UMFPACK, + "umf_pack": PentaSolverAliases.UMFPACK, +} diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 067189d..fabb1cc 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -1,20 +1,219 @@ """The core module of pentapy.""" # pylint: disable=C0103, C0415, R0911, E0611 + +# === Imports === + import warnings +from typing import Literal, Optional import numpy as np -from pentapy.solver import penta_solver1, penta_solver2 -from pentapy.tools import _check_penta, create_banded, shift_banded +from pentapy import _models as pmodels +from pentapy import errors as perrors +from pentapy import solver as psolver # type: ignore +from pentapy import tools as ptools + +# === Types === + +SolverAliases = Literal[ + 1, + "1", + "PTRANS-I", + "ptrans-i", + 2, + "2", + "PTRANS-II", + "ptrans-ii", + 3, + "3", + "lapack", + 4, + "4", + "spsolve", + 5, + "5", + "spsolve_umf", + "umf", + "umf_pack", +] + +# === Auxiliary functions === + + +def _raise_ptrans_or_numpy_shape_mismatch_error( + mat_n_cols: int, + rhs_n_rows: int, +) -> None: + """ + Raises a shape mismatch error for the PTRANS solver or the NumPy dense solver. + + """ + + raise ValueError( + perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format( + lhs_n_cols=mat_n_cols, + rhs_n_rows=rhs_n_rows, + ) + ) + + +def _handle_ptrans_info_complete_fail_cases( + info: int, + mat_n_cols: int, + rhs_n_rows: int, +) -> None: + """ + Handles the cases where the PTRANS solver by raising the appropriate error. + + """ + + # Case 1: shape mismatch + if info == pmodels.Infos.SHAPE_MISMATCH: + _raise_ptrans_or_numpy_shape_mismatch_error( + mat_n_cols=mat_n_cols, + rhs_n_rows=rhs_n_rows, + ) + + # Case 2: wrong solver + elif info == pmodels.Infos.WRONG_SOLVER: # pragma: no cover + raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER) + + # Case 3: unknown error + + raise AssertionError( # pragma: no cover + perrors.PentaPyErrorMessages.UNKNOWN_ERROR, + ) + +# === Auxiliary Solver Interfaces === + + +def _solve_with_numpy( + mat_flat: np.ndarray, + rhs: np.ndarray, +) -> np.ndarray: + """ + Solver for a pentadiagonal system using NumPy's dense LAPACK solver. + + """ + + # in case of a shape mismatch, an error will be raised + if not mat_flat.shape[1] == rhs.shape[0]: + _raise_ptrans_or_numpy_shape_mismatch_error( + mat_n_cols=mat_flat.shape[1], + rhs_n_rows=rhs.shape[0], + ) + + # then, the system is solved using NumPy's dense solver + try: + return np.linalg.solve( + a=ptools.create_full(mat_flat, col_wise=False), + b=rhs, + ) + + # in case of a singular matrix, a warning will be issued and NaNs will be returned + except np.linalg.LinAlgError: + warnings.warn("pentapy: NumPy LAPACK dense solver encountered singular matrix.") + return np.full(shape=rhs.shape, fill_value=np.nan) + + +def _solve_with_ptrans( # pylint: disable=R1710 + mat: np.ndarray, + rhs: np.ndarray, + is_flat: bool, + index_row_wise: bool, + num_threads: Optional[int], + solver_inter: pmodels.PentaSolverAliases, +) -> np.ndarray: # type: ignore + """ + Solver for a pentadiagonal system using one of the PTRANS algorithms. -def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): + """ + + # the matrix is checked and shifted if necessary ... + if is_flat and index_row_wise: + mat_flat = np.asarray(mat, dtype=np.double) + ptools._check_penta(mat_flat) # pylint: disable=W0212 + elif is_flat: + mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy + ptools._check_penta(mat_flat) # pylint: disable=W0212 + ptools.shift_banded(mat_flat, copy=False) + else: + mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double) + + # ... followed by the conversion of the right-hand side + rhs = np.asarray(rhs, dtype=np.double) + + # Special case: Early exit when the matrix has only 3 rows/columns + # NOTE: this avoids memory leakage in the Cython-solver that will iterate over + # at least 4 rows/columns no matter what + if mat_flat.shape[1] == 3: + return _solve_with_numpy(mat_flat=mat_flat, rhs=rhs) + + # if there is only a single right-hand side, it has to be reshaped to a 2D array + # NOTE: this has to be reverted at the end + single_rhs = rhs.ndim == 1 + rhs_og_shape = rhs.shape + if single_rhs: + rhs = rhs[:, np.newaxis] + + # the respective solver is chosen ... + solver_func = ( + psolver.penta_solver1 # pylint: disable=I1101 + if solver_inter == pmodels.PentaSolverAliases.PTRANS_I + else psolver.penta_solver2 # pylint: disable=I1101 + ) + + # ... and the solver is called + sol, info = solver_func( + np.ascontiguousarray(mat_flat), + np.ascontiguousarray(rhs), + num_threads, + ) + + # in case of success, the solution can be returned (reshaped if necessary) + if info == pmodels.Infos.SUCCESS: + if single_rhs: + sol = sol.ravel() + + return sol + + # in case of a singular matrix, a warning will be issued and NaNs will be returned + if info > pmodels.Infos.SUCCESS: + warnings.warn( + perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format( + solver_inter_name=pmodels.PentaSolverAliases.PTRANS_I.name, + row_idx=info - 1, + ) + ) + + return np.full(shape=rhs_og_shape, fill_value=np.nan) + + # in case of an error, the respective error will be raised + _handle_ptrans_info_complete_fail_cases( + info=info, + mat_n_cols=mat_flat.shape[1], + rhs_n_rows=rhs_og_shape[0], + ) + + +# === Main Solver Interface === + + +def solve( + mat: np.ndarray, + rhs: np.ndarray, + is_flat: bool = False, + index_row_wise: bool = True, + solver: SolverAliases = 1, + num_threads: Optional[int] = None, +) -> np.ndarray: """ Solver for a pentadiagonal system. - The matrix can be given as a full n x n matrix or as a flattend one. - The flattend matrix can be given in a row-wise flattend form:: + The matrix can be given as a full (n x n) matrix or as a flattened one. + The flattened matrix can be given in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] [Dup1[0] Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] 0 ] @@ -22,7 +221,7 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): [0 Dlow1[1] Dlow1[2] ... Dlow1[N-2] Dlow1[N-1] Dlow1[N]] [0 0 Dlow2[2] ... Dlow2[N-2] Dlow2[N-2] Dlow2[N]]] - Or a column-wise flattend form:: + Or a column-wise flattened form:: [[0 0 Dup2[2] ... Dup2[N-2] Dup2[N-1] Dup2[N] ] [0 Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] Dup1[N] ] @@ -32,129 +231,148 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): Dup1 and Dup2 are the first and second upper minor-diagonals and Dlow1 resp. Dlow2 are the lower ones. - If you provide a column-wise flattend matrix, you have to set:: + If you provide a column-wise flattened matrix, you have to set:: index_row_wise=False Parameters ---------- - mat : :class:`numpy.ndarray` - The Matrix or the flattened Version of the pentadiagonal matrix. - rhs : :class:`numpy.ndarray` - The right hand side of the equation system. + mat : :class:`numpy.ndarray` of shape (m, m) or (5, m) + The full or flattened version of the pentadiagonal matrix. + rhs : :class:`numpy.ndarray` of shape (m,) or (m, n) + The right hand side(s) of the equation system. Its shape determines the shape + of the output as they will be identical. is_flat : :class:`bool`, optional - State if the matrix is already flattend. Default: ``False`` + State if the matrix is already flattened. Default: ``False`` index_row_wise : :class:`bool`, optional - State if the flattend matrix is row-wise flattend. Default: ``True`` + State if the flattened matrix is row-wise flattened. Default: ``True`` solver : :class:`int` or :class:`str`, optional Which solver should be used. The following are provided: - * ``[1, "1", "PTRANS-I"]`` : The PTRANS-I algorithm + * ``[1, "1", "PTRANS-I"]`` : The PTRANS-I algorithm (default) * ``[2, "2", "PTRANS-II"]`` : The PTRANS-II algorithm - * ``[3, "3", "lapack", "solve_banded"]`` : - scipy.linalg.solve_banded - * ``[4, "4", "spsolve"]`` : - The scipy sparse solver without umf_pack - * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : - The scipy sparse solver with umf_pack + * ``[3, "3", "lapack", "solve_banded"]`` : :func:`scipy.linalg.solve_banded` + * ``[4, "4", "spsolve"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` + * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` - Default: ``1`` + Strings are not case-sensitive. + num_threads : :class:`int` or ``None``, optional + Number of threads used in the PTRANS-I and PTRANS-II solvers for parallel + processing of multiple right-hand sides. Parallelisation overhead can be + significant for small systems. If set to a negative value or ``None``, the + number of threads is automatically determined. Default: ``None`` Returns ------- - result : :class:`numpy.ndarray` - Solution of the equation system - """ - if solver in [1, "1", "PTRANS-I"]: - if is_flat and index_row_wise: - mat_flat = np.asarray(mat, dtype=np.double) - _check_penta(mat_flat) - elif is_flat: - mat_flat = np.array(mat, dtype=np.double) - _check_penta(mat_flat) - shift_banded(mat_flat, copy=False) - else: - mat_flat = create_banded(mat, col_wise=False, dtype=np.double) - rhs = np.asarray(rhs, dtype=np.double) - try: - return penta_solver1(mat_flat, rhs) - except ZeroDivisionError: - warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.") - return np.full_like(rhs, np.nan) - elif solver in [2, "2", "PTRANS-II"]: - if is_flat and index_row_wise: - mat_flat = np.asarray(mat, dtype=np.double) - _check_penta(mat_flat) - elif is_flat: - mat_flat = np.array(mat, dtype=np.double) - _check_penta(mat_flat) - shift_banded(mat_flat, copy=False) - else: - mat_flat = create_banded(mat, col_wise=False, dtype=np.double) - rhs = np.asarray(rhs, dtype=np.double) - try: - return penta_solver2(mat_flat, rhs) - except ZeroDivisionError: - warnings.warn("pentapy: PTRANS-II not suitable for input-matrix.") - return np.full_like(rhs, np.nan) - elif solver in [3, "3", "lapack", "solve_banded"]: # pragma: no cover + result : :class:`numpy.ndarray` of shape (m,) or (m, n) + Solution of the equation system with the same shape as ``rhs``. + + Raises + ------ + ValueError + If there is a shape mismatch between the number of equations in the left-hand + side matrix and the number of right-hand sides. + + """ # pylint: disable=C0301 + + # first, the solver is converted to the internal name to avoid confusion + solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[ # pylint: disable=W0212 + str(solver).lower() + ] + + # Case 1: the pentapy solvers + if solver_inter in { + pmodels.PentaSolverAliases.PTRANS_I, + pmodels.PentaSolverAliases.PTRANS_II, + }: + + return _solve_with_ptrans( + mat=mat, + rhs=rhs, + is_flat=is_flat, + index_row_wise=index_row_wise, + num_threads=num_threads, + solver_inter=solver_inter, + ) + + # Case 2: LAPACK's banded solver + if solver_inter == pmodels.PentaSolverAliases.LAPACK: try: from scipy.linalg import solve_banded except ImportError as imp_err: # pragma: no cover msg = "pentapy.solve: scipy.linalg.solve_banded could not be imported" raise ValueError(msg) from imp_err + if is_flat and index_row_wise: - mat_flat = np.array(mat) - _check_penta(mat_flat) - shift_banded(mat_flat, col_to_row=False, copy=False) + mat_flat = np.array(mat) # NOTE: this is a copy + ptools._check_penta(mat_flat) # pylint: disable=W0212 + ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: mat_flat = np.asarray(mat) else: - mat_flat = create_banded(mat) - return solve_banded((2, 2), mat_flat, rhs) - elif solver in [4, "4", "spsolve"]: # pragma: no cover + mat_flat = ptools.create_banded(mat) + + # NOTE: since this is a general banded solver, the number of sub- and super- + # diagonals has to be provided + # NOTE: LAPACK handles all the reshaping and flattening internally try: - from scipy import sparse as sps - from scipy.sparse.linalg import spsolve - except ImportError as imp_err: - msg = "pentapy.solve: scipy.sparse could not be imported" - raise ValueError(msg) from imp_err - if is_flat and index_row_wise: - mat_flat = np.array(mat) - _check_penta(mat_flat) - shift_banded(mat_flat, col_to_row=False, copy=False) - elif is_flat: - mat_flat = np.asarray(mat) - else: - mat_flat = create_banded(mat) - size = mat_flat.shape[1] - M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc") - return spsolve(M, rhs, use_umfpack=False) - elif solver in [ - 5, - "5", - "spsolve_umf", - "umf", - "umf_pack", - ]: # pragma: no cover + return solve_banded( + l_and_u=(2, 2), + ab=mat_flat, + b=rhs, + ) + + except np.linalg.LinAlgError: + warnings.warn("pentapy: LAPACK solver encountered singular matrix.") + return np.full(shape=rhs.shape, fill_value=np.nan) + + # Case 3: SciPy's sparse solver with or without UMFPACK + if solver_inter in { + pmodels.PentaSolverAliases.SUPER_LU, + pmodels.PentaSolverAliases.UMFPACK, + }: try: from scipy import sparse as sps from scipy.sparse.linalg import spsolve - except ImportError as imp_err: + except ImportError as imp_err: # pragma: no cover msg = "pentapy.solve: scipy.sparse could not be imported" raise ValueError(msg) from imp_err + if is_flat and index_row_wise: - mat_flat = np.array(mat) - _check_penta(mat_flat) - shift_banded(mat_flat, col_to_row=False, copy=False) + mat_flat = np.array(mat) # NOTE: this is a copy + ptools._check_penta(mat_flat) # pylint: disable=W0212 + ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: mat_flat = np.asarray(mat) else: - mat_flat = create_banded(mat) + mat_flat = ptools.create_banded(mat) + + # the solvers require a sparse left-hand side matrix, so this is created here + # NOTE: the UMFPACK solver will not be triggered for multiple right-hand sides + use_umfpack = solver_inter == pmodels.PentaSolverAliases.UMFPACK size = mat_flat.shape[1] - M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc") - return spsolve(M, rhs, use_umfpack=True) - else: # pragma: no cover - msg = f"pentapy.solve: unknown solver ({solver})" - raise ValueError(msg) + M = sps.spdiags( + data=mat_flat, + diags=[2, 1, 0, -1, -2], + m=size, + n=size, + format="csc", + ) + + sol = spsolve( + A=M, + b=rhs, + use_umfpack=use_umfpack, + ) + + # NOTE: spsolve flattens column-vectors, thus their shape has to be restored + # NOTE: it already fills the result vector with NaNs if the matrix is singular + if rhs.ndim == 2 and 1 in rhs.shape: + sol = sol[::, np.newaxis] + + return sol + + # Case 4: unknown solver + msg = f"pentapy.solve: unknown solver ({solver})" # pragma: no cover + raise ValueError(msg) # pragma: no cover diff --git a/src/pentapy/errors.py b/src/pentapy/errors.py new file mode 100644 index 0000000..8479cd3 --- /dev/null +++ b/src/pentapy/errors.py @@ -0,0 +1,33 @@ +""" +Auxiliary errors for the pentapy package. + +""" + +# === Imports === + +from enum import Enum + + +class PentaPyErrorMessages(str, Enum): + """ + Defines the possible error messages for the pentapy package, namely + + - ``SINGULAR_MATRIX``: the matrix is singular + - ``SHAPE_MISMATCH``: the shape of the input arrays is incorrect + - ``WRONG_SOLVER``: the solver alias is incorrect on C-level (internal error, + should not occur) + - ``UNKNOWN_ERROR``: an unknown error occurred + + """ + + SINGULAR_MATRIX = ( + "pentapy: {solver_inter_name} solver encountered singular matrix at " + "row index {row_idx}. Returning NaNs." + ) + SHAPE_MISMATCH = ( + "pentapy.solve: shape mismatch between the number of equations in the " + "left-hand side matrix ({lhs_n_cols}) and the number of right-hand sides " + "({rhs_n_rows})." + ) + WRONG_SOLVER = "pentapy.solve: failure in determining the solver internally." + UNKNOWN_ERROR = "pentapy.solve: unknown error in the pentadiagonal solver." diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd index e7c471e..ec7adc3 100644 --- a/src/pentapy/solver.pxd +++ b/src/pentapy/solver.pxd @@ -1,4 +1,14 @@ # cython: language_level=3 -cdef double[:] c_penta_solver1(double[:, :] mat_flat, double[:] rhs) +cdef double[::, ::1] c_penta_solver1( + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, + int num_threads, + int* info, +) -cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs) +cdef double[::, ::1] c_penta_solver2( + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, + int num_threads, + int* info, +) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 469b074..11db625 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -1,120 +1,780 @@ # cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True + """ This is a solver linear equation systems with a penta-diagonal matrix, -implemented in cython. -""" -import numpy as np - -cimport numpy as np - - -def penta_solver1(double[:, :] mat_flat, double[:] rhs): - return np.asarray(c_penta_solver1(mat_flat, rhs)) - - -def penta_solver2(double[:, :] mat_flat, double[:] rhs): - return np.asarray(c_penta_solver2(mat_flat, rhs)) +implemented in Cython. +""" -cdef double[:] c_penta_solver1(double[:, :] mat_flat, double[:] rhs): - - cdef int mat_j = mat_flat.shape[1] - - cdef double[:] result = np.zeros(mat_j) - - cdef double[:] al = np.zeros(mat_j) - cdef double[:] be = np.zeros(mat_j) - cdef double[:] ze = np.zeros(mat_j) - cdef double[:] ga = np.zeros(mat_j) - cdef double[:] mu = np.zeros(mat_j) - - cdef int i - - mu[0] = mat_flat[2, 0] - al[0] = mat_flat[1, 0] / mu[0] - be[0] = mat_flat[0, 0] / mu[0] - ze[0] = rhs[0] / mu[0] +# Imports - ga[1] = mat_flat[3, 1] - mu[1] = mat_flat[2, 1] - al[0] * ga[1] - al[1] = (mat_flat[1, 1] - be[0] * ga[1]) / mu[1] - be[1] = mat_flat[0, 1] / mu[1] - ze[1] = (rhs[1] - ze[0] * ga[1]) / mu[1] +import numpy as np - for i in range(2, mat_j-2): - ga[i] = mat_flat[3, i] - al[i-2] * mat_flat[4, i] - mu[i] = mat_flat[2, i] - be[i-2] * mat_flat[4, i] - al[i-1] * ga[i] - al[i] = (mat_flat[1, i] - be[i-1] * ga[i]) / mu[i] - be[i] = mat_flat[0, i] / mu[i] - ze[i] = (rhs[i] - ze[i-2] * mat_flat[4, i] - ze[i-1] * ga[i]) / mu[i] +cimport numpy as np +from cython cimport view - ga[mat_j-2] = mat_flat[3, mat_j-2] - al[mat_j-4] * mat_flat[4, mat_j-2] - mu[mat_j-2] = mat_flat[2, mat_j-2] - be[mat_j-4] * mat_flat[4, mat_j-2] - al[mat_j-3] * ga[mat_j-2] - al[mat_j-2] = (mat_flat[1, mat_j-2] - be[mat_j-3] * ga[mat_j-2]) / mu[mat_j-2] +from cython.parallel import prange - ga[mat_j-1] = mat_flat[3, mat_j-1] - al[mat_j-3] * mat_flat[4, mat_j-1] - mu[mat_j-1] = mat_flat[2, mat_j-1] - be[mat_j-3] * mat_flat[4, mat_j-1] - al[mat_j-2] * ga[mat_j-1] +from libc.stdint cimport int64_t - ze[mat_j-2] = (rhs[mat_j-2] - ze[mat_j-4] * mat_flat[4, mat_j-2] - ze[mat_j-3] * ga[mat_j-2]) / mu[mat_j-2] - ze[mat_j-1] = (rhs[mat_j-1] - ze[mat_j-3] * mat_flat[4, mat_j-1] - ze[mat_j-2] * ga[mat_j-1]) / mu[mat_j-1] +# NOTE: OPENMP is set during setup +if OPENMP: + cimport openmp - # Backward substitution - result[mat_j-1] = ze[mat_j-1] - result[mat_j-2] = ze[mat_j-2] - al[mat_j-2] * result[mat_j-1] +# === Optional Setup of OpenMP === - for i in range(mat_j-3, -1, -1): - result[i] = ze[i] - al[i] * result[i+1] - be[i] * result[i+2] - return result +def get_c_num_threads(num_threads): + # if the thread number was speficied explicitly, it needs to be sanitized + if num_threads is not None: + if num_threads >= 0: + return max(1, num_threads) + elif OPENMP: # negative numbers result in maximum thread number with OPENMP + return openmp.omp_get_max_threads() -cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs): + # without OpenMP, the number of threads is set to 1 in the final return - cdef int mat_j = mat_flat.shape[1] + # if None, the maximum thread number is retrieved with OPENMP if available + # NOTE: OPENMP is set during setup + if OPENMP: + return openmp.omp_get_num_procs() - cdef double[:] result = np.zeros(mat_j) + # if no threads were set so far, the number of threads is set to 1 (serial mode) + return 1 - cdef double[:] ps = np.zeros(mat_j) # psi - cdef double[:] si = np.zeros(mat_j) # sigma - cdef double[:] ph = np.zeros(mat_j) # phi - cdef double[:] ro = np.zeros(mat_j) # rho - cdef double[:] we = np.zeros(mat_j) # w - cdef int i +# === Constants === - ps[mat_j-1] = mat_flat[2, mat_j-1] - si[mat_j-1] = mat_flat[3, mat_j-1] / ps[mat_j-1] - ph[mat_j-1] = mat_flat[4, mat_j-1] / ps[mat_j-1] - we[mat_j-1] = rhs[mat_j-1] / ps[mat_j-1] +cdef enum: MAT_FACT_N_COLS = 5 - ro[mat_j-2] = mat_flat[1, mat_j-2] - ps[mat_j-2] = mat_flat[2, mat_j-2] - si[mat_j-1] * ro[mat_j-2] - si[mat_j-2] = (mat_flat[3, mat_j-2] - ph[mat_j-1] * ro[mat_j-2]) / ps[mat_j-2] - ph[mat_j-2] = mat_flat[4, mat_j-2] / ps[mat_j-2] - we[mat_j-2] = (rhs[mat_j-2] - we[mat_j-1] * ro[mat_j-2]) / ps[mat_j-2] +cdef enum Solvers: + PTRRANS_1 = 1 + PTRRANS_2 = 2 - for i in range(mat_j-3, 1, -1): - ro[i] = mat_flat[1, i] - si[i+2] * mat_flat[0, i] - ps[i] = mat_flat[2, i] - ph[i+2] * mat_flat[0, i] - si[i+1] * ro[i] - si[i] = (mat_flat[3, i] - ph[i+1] * ro[i]) / ps[i] - ph[i] = mat_flat[4, i] / ps[i] - we[i] = (rhs[i] - we[i+2] * mat_flat[0, i] - we[i+1] * ro[i]) / ps[i] +cdef enum Infos: + SUCCESS = 0 + SHAPE_MISMATCH = -1 + WRONG_SOLVER = -2 - ro[1] = mat_flat[1, 1] - si[3] * mat_flat[0, 1] - ps[1] = mat_flat[2, 1] - ph[3] * mat_flat[0, 1] - si[2] * ro[1] - si[1] = (mat_flat[3, 1] - ph[2] * ro[1]) / ps[1] +# === Main Python Interface === - ro[0] = mat_flat[1, 0] - si[2] * mat_flat[0, 0] - ps[0] = mat_flat[2, 0] - ph[2] * mat_flat[0, 0] - si[1] * ro[0] - we[1] = (rhs[1] - we[3] * mat_flat[0, 1] - we[2] * ro[1]) / ps[1] - we[0] = (rhs[0] - we[2] * mat_flat[0, 0] - we[1] * ro[0]) / ps[0] +def penta_solver1( + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, + num_threads=None, +): - # Foreward substitution - result[0] = we[0] - result[1] = we[1] - si[1] * result[0] + # NOTE: info is defined to be overwritten for possible future validations + cdef int info - for i in range(2, mat_j): - result[i] = we[i] - si[i] * result[i-1] - ph[i] * result[i-2] + num_threads_c = get_c_num_threads(num_threads) + return ( + np.asarray( + c_penta_solver1( + mat_flat, + rhs, + num_threads_c, + &info, + ) + ), + info, + ) - return result + +def penta_solver2( + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, + num_threads=None, +): + + # NOTE: info is defined to be overwritten for possible future validations + cdef int info + + num_threads_c = get_c_num_threads(num_threads) + return ( + np.asarray( + c_penta_solver2( + mat_flat, + rhs, + num_threads_c, + &info, + ) + ), + info, + ) + + +# === Solver Algorithm 1 === + +cdef double[::, ::1] c_penta_solver1( + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, + int num_threads, + int* info, +): + """ + Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and + the right-hand side ``b`` by + + - factorizing the matrix ``A`` into auxiliary coefficients and a unit upper + triangular matrix ``U`` + - transforming the right-hand side into a vector ``zeta`` + - solving the system of equations ``Ux = zeta`` by backward substitution + + """ + + # --- Initial checks --- + + # if the number of columns in the flattened matrix is not equal to the number of + # rows in the right-hand side, the function exits early to avoid memory errors + if mat_flat.shape[1] != rhs.shape[0]: + info[0] = Infos.SHAPE_MISMATCH + return np.empty_like(rhs) + + # --- Solving the system of equations --- + + # first, the matrix is factorized + cdef double[::, ::1] mat_factorized = _c_interf_factorize( + mat_flat, + info, + Solvers.PTRRANS_1, + ) + + # in case of an error during factorization, the function exits early + if info[0] != Infos.SUCCESS: + return np.empty_like(rhs) + + # then, all the right-hand sides are solved + return _c_interf_factorize_solve( + mat_factorized, + rhs, + num_threads, + info, + Solvers.PTRRANS_1, + ) + + +cdef double[::, ::1] _c_interf_factorize( + const double[::, ::1] mat_flat, + int* info, + int solver, +): + """ + This function serves as the interface that takes the memoryview of the flattened + matrix and returns the freshly allocated factorized matrix. + + """ + + # --- Variable declarations --- + + cdef int64_t mat_n_cols = mat_flat.shape[1] + tmp = view.array( + shape=(mat_n_cols, MAT_FACT_N_COLS), + itemsize=sizeof(double), + format="d", + ) + cdef double[::, ::1] mat_factorized = tmp + + # --- Factorization --- + + # the solver algorithm is chosen based on the input parameter + # Case 1: PTRRANS-I + if solver == Solvers.PTRRANS_1: + info[0] = _c_core_factorize_algo_1( + &mat_flat[0, 0], + mat_n_cols, + &mat_factorized[0, 0], + ) + return mat_factorized + + # Case 2: PTRRANS-II + elif solver == Solvers.PTRRANS_2: + info[0] = _c_core_factorize_algo_2( + &mat_flat[0, 0], + mat_n_cols, + &mat_factorized[0, 0], + ) + return mat_factorized + + # Case 3: the wrong solver is chosen + else: + info[0] = Infos.WRONG_SOLVER + return mat_factorized + + +cdef double[::, ::1] _c_interf_factorize_solve( + double[::, ::1] mat_factorized, + const double[::, ::1] rhs, + int num_threads, + int* info, + int solver, +): + """ + This function serves as the interface that takes the factorized matrix and the + right-hand sides and returns the freshly allocated solution vector obtained by + solving the system of equations via backward substitution. + + """ + + # --- Variable declarations --- + + cdef int64_t mat_n_cols = mat_factorized.shape[0] + cdef int64_t rhs_n_cols = rhs.shape[1] + cdef int64_t iter_col + tmp = view.array( + shape=(mat_n_cols, rhs_n_cols), + itemsize=sizeof(double), + format="d", + ) + cdef double[::, ::1] result = tmp + + # --- Solving the system of equations --- + + # the solver algorithm is chosen based on the input parameter + # Case 1: PTRRANS-I + if solver == Solvers.PTRRANS_1: + for iter_col in prange( + rhs_n_cols, + nogil=True, + num_threads=num_threads, + ): + info[0] = _c_core_factorize_solve_algo_1( + mat_n_cols, + &mat_factorized[0, 0], + &rhs[0, iter_col], + rhs_n_cols, + &result[0, iter_col], + ) + + return result + + # Case 2: PTRRANS-II + elif solver == Solvers.PTRRANS_2: + for iter_col in prange( + rhs_n_cols, + nogil=True, + num_threads=num_threads, + ): + info[0] = _c_core_factorize_solve_algo_2( + mat_n_cols, + &mat_factorized[0, 0], + &rhs[0, iter_col], + rhs_n_cols, + &result[0, iter_col], + ) + + return result + + # Case 3: the wrong solver is chosen + else: + info[0] = Infos.WRONG_SOLVER + return result + + +cdef int _c_core_factorize_algo_1( + double* mat_flat, + int64_t mat_n_cols, + double* mat_factorized, +): + """ + Factorizes the pentadiagonal matrix ``A`` into + + - auxiliary coefficients ``e``, ``mu`` and ``gamma`` (``ga``) for the transformation + of the right-hand side + - a unit upper triangular matrix with the main diagonals ``alpha``(``al``) and + ``beta`` (``be``) for the following backward substitution. Its unit main + diagonal is implicit. + + They are overwriting the memoryview ``mat_factorized`` as follows: + + ```bash + [[ * * mu_0 al_0 be_0 ] + [ * ga_1 mu_1 al_1 be_1 ] + [ e_2 ga_2 mu_2 al_2 be_2 ] + ... + [ e_i ga_i mu_i al_i be_i ] + [ e_{n-3} ga_{n-3} mu_{n-3} al_{n-3} be_{n-3} ] ... + [ e_{n-2} ga_{n-2} mu_{n-2} al_{n-2} * ] + [ e_{n-1} ga_{n-1} mu_{n-1} * * ]] + ``` + + where the entries marked with ``*`` are not used by design, but overwritten with + zeros. + + """ + + # --- Variable declarations --- + + cdef int64_t iter_row, fact_curr_base_idx + cdef int64_t mat_row_base_idx_1 = mat_n_cols # base index for the second row + cdef int64_t mat_row_base_idx_2 = 2 * mat_n_cols # base index for the third row + cdef int64_t mat_row_base_idx_3 = 3 * mat_n_cols # base index for the fourth row + cdef int64_t mat_row_base_idx_4 = 4 * mat_n_cols # base index for the fifth row + cdef double mu_i, ga_i, e_i # mu, gamma, e + cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha + cdef double be_i, be_i_minus_1, be_i_plus_1 # beta + + # --- Factorization --- + + # NOTE: in the following mu is manually checked for zero-division to extract the + # proper value of ``info`` and exit early in case of failure; + # ``info`` is set to the row count where the error occured as for LAPACK ``pbtrf`` + + # First row + mu_i = mat_flat[mat_row_base_idx_2] + if mu_i == 0.0: + return 1 + + al_i_minus_1 = mat_flat[mat_row_base_idx_1] / mu_i + be_i_minus_1 = mat_flat[0] / mu_i + + mat_factorized[0] = 0.0 + mat_factorized[1] = 0.0 + mat_factorized[2] = mu_i + mat_factorized[3] = al_i_minus_1 + mat_factorized[4] = be_i_minus_1 + + # Second row + ga_i = mat_flat[mat_row_base_idx_3 + 1] + mu_i = mat_flat[mat_row_base_idx_2 + 1] - al_i_minus_1 * ga_i + if mu_i == 0.0: + return 2 + + al_i = (mat_flat[mat_row_base_idx_1 + 1] - be_i_minus_1 * ga_i) / mu_i + be_i = mat_flat[1] / mu_i + + mat_factorized[5] = 0.0 + mat_factorized[6] = ga_i + mat_factorized[7] = mu_i + mat_factorized[8] = al_i + mat_factorized[9] = be_i + + # Central rows + fact_curr_base_idx = 10 + for iter_row in range(2, mat_n_cols-2): + e_i = mat_flat[mat_row_base_idx_4 + iter_row] + ga_i = mat_flat[mat_row_base_idx_3 + iter_row] - al_i_minus_1 * e_i + mu_i = mat_flat[mat_row_base_idx_2 + iter_row] - be_i_minus_1 * e_i - al_i * ga_i + if mu_i == 0.0: + return iter_row + 1 + + al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + iter_row] - be_i * ga_i) / mu_i + al_i_minus_1 = al_i + al_i = al_i_plus_1 + + be_i_plus_1 = mat_flat[iter_row] / mu_i + be_i_minus_1 = be_i + be_i = be_i_plus_1 + + mat_factorized[fact_curr_base_idx] = e_i + mat_factorized[fact_curr_base_idx + 1] = ga_i + mat_factorized[fact_curr_base_idx + 2] = mu_i + mat_factorized[fact_curr_base_idx + 3] = al_i + mat_factorized[fact_curr_base_idx + 4] = be_i + + fact_curr_base_idx += MAT_FACT_N_COLS + + # Second to last row + e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2] + ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - al_i_minus_1 * e_i + mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - be_i_minus_1 * e_i - al_i * ga_i + if mu_i == 0.0: + return mat_n_cols - 1 + + al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] - be_i * ga_i) / mu_i + + mat_factorized[fact_curr_base_idx] = e_i + mat_factorized[fact_curr_base_idx + 1] = ga_i + mat_factorized[fact_curr_base_idx + 2] = mu_i + mat_factorized[fact_curr_base_idx + 3] = al_i_plus_1 + mat_factorized[fact_curr_base_idx + 4] = 0.0 + + # Last Row + e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1] + ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] - al_i * e_i + mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1] - be_i * e_i - al_i_plus_1 * ga_i + if mu_i == 0.0: + return mat_n_cols + + mat_factorized[fact_curr_base_idx + 5] = e_i + mat_factorized[fact_curr_base_idx + 6] = ga_i + mat_factorized[fact_curr_base_idx + 7] = mu_i + mat_factorized[fact_curr_base_idx + 8] = 0.0 + mat_factorized[fact_curr_base_idx + 9] = 0.0 + + return 0 + + +cdef int _c_core_factorize_solve_algo_1( + int64_t mat_n_cols, + double* mat_factorized, + double* rhs_single, + int64_t rhs_n_cols, + double* result_view, +) noexcept nogil: + """ + Solves the pentadiagonal system of equations ``Ax = b`` with the factorized + unit upper triangular matrix ``U`` and the right-hand side ``b``. + It overwrites the right-hand side ``b`` first with the transformed vector ``zeta`` + and then with the solution vector ``x`` for ``Ux = zeta``. + + """ + + # --- Variable declarations --- + + cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx + cdef double ze_i, ze_i_minus_1, ze_i_plus_1 # zeta + + # --- Transformation --- + + # first, the right-hand side is transformed into the vector ``zeta`` + # First row + + ze_i_minus_1 = rhs_single[0] / mat_factorized[2] + result_view[0] = ze_i_minus_1 + + # Second row + ze_i = (rhs_single[rhs_n_cols] - ze_i_minus_1 * mat_factorized[6]) / mat_factorized[7] + result_view[rhs_n_cols] = ze_i + + # Central rows + fact_curr_base_idx = 10 + res_curr_base_idx = rhs_n_cols + rhs_n_cols + + for iter_row in range(2, mat_n_cols-2): + ze_i_plus_1 = ( + rhs_single[res_curr_base_idx] + - ze_i_minus_1 * mat_factorized[fact_curr_base_idx] + - ze_i * mat_factorized[fact_curr_base_idx + 1] + ) / mat_factorized[fact_curr_base_idx + 2] + ze_i_minus_1 = ze_i + ze_i = ze_i_plus_1 + result_view[res_curr_base_idx] = ze_i_plus_1 + + fact_curr_base_idx += MAT_FACT_N_COLS + res_curr_base_idx += rhs_n_cols + + # Second to last row + ze_i_plus_1 = ( + rhs_single[res_curr_base_idx] + - ze_i_minus_1 * mat_factorized[fact_curr_base_idx] + - ze_i * mat_factorized[fact_curr_base_idx + 1] + ) / mat_factorized[fact_curr_base_idx + 2] + ze_i_minus_1 = ze_i + ze_i = ze_i_plus_1 + result_view[res_curr_base_idx] = ze_i_plus_1 + + # Last row + ze_i_plus_1 = ( + rhs_single[res_curr_base_idx + rhs_n_cols] + - ze_i_minus_1 * mat_factorized[fact_curr_base_idx + 5] + - ze_i * mat_factorized[fact_curr_base_idx + 6] + ) / mat_factorized[fact_curr_base_idx + 7] + result_view[res_curr_base_idx + rhs_n_cols] = ze_i_plus_1 + + # --- Backward substitution --- + + # The solution vector is calculated by backward substitution that overwrites the + # right-hand side vector with the solution vector + ze_i -= mat_factorized[fact_curr_base_idx + 3] * ze_i_plus_1 + result_view[res_curr_base_idx] = ze_i + + for iter_row in range(mat_n_cols-3, -1, -1): + fact_curr_base_idx -= MAT_FACT_N_COLS + res_curr_base_idx -= rhs_n_cols + + result_view[res_curr_base_idx] -= ( + mat_factorized[fact_curr_base_idx + 3] * ze_i + + mat_factorized[fact_curr_base_idx + 4] * ze_i_plus_1 + ) + ze_i_plus_1 = ze_i + ze_i = result_view[res_curr_base_idx] + + return 0 + +# === Solver Algorithm 2 === + + +cdef double[::, ::1] c_penta_solver2( + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, + int num_threads, + int* info, +): + """ + Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and + the right-hand side ``b`` by + + - factorizing the matrix ``A`` into auxiliary coefficients and a unit lower + triangular matrix ``L`` + - transforming the right-hand side into a vector ``omega`` + - solving the system of equations ``Lx = omega`` by backward substitution + + """ + + # --- Initial checks --- + + # if the number of columns in the flattened matrix is not equal to the number of + # rows in the right-hand side, the function exits early to avoid memory errors + if mat_flat.shape[1] != rhs.shape[0]: + info[0] = Infos.SHAPE_MISMATCH + return np.empty_like(rhs) + + # --- Solving the system of equations --- + + # first, the matrix is factorized + cdef double[::, ::1] mat_factorized = _c_interf_factorize( + mat_flat, + info, + Solvers.PTRRANS_2, + ) + + # in case of an error during factorization, the function exits early + if info[0] != Infos.SUCCESS: + return np.empty_like(rhs) + + # then, all the right-hand sides are solved + return _c_interf_factorize_solve( + mat_factorized, + rhs, + num_threads, + info, + Solvers.PTRRANS_2, + ) + + +cdef int _c_core_factorize_algo_2( + double* mat_flat, + int64_t mat_n_cols, + double* mat_factorized, +): + """ + Factorizes the pentadiagonal matrix ``A`` into + + - auxiliary coefficients ``psi`` (``ps``), ``rho`` and ``b`` for the transformation + of the right-hand side + - a unit lower triangular matrix with the main diagonals ``phi`` and ``sigma`` + (``si``) for the following forward substitution. Its unit main diagonal is + implicit. + + They are overwriting the memoryview ``mat_factorized`` as follows: + + ```bash + [[ * * ps_0 rho_0 b_i ] + [ * si_1 ps_1 rho_1 b_1 ] + [ phi_2 si_2 ps_2 rho_2 b_2 ] + ... + [ phi_i si_i ps_i rho_i b_i ] + ... + [ phi_{n-3} si_{n-3} ps_{n-3} rho_{n-3} b_{n-3} ] + [ phi_{n-2} si_{n-2} ps_{n-2} rho_{n-2} * ] + [ phi_{n-1} si_{n-1} ps_{n-1} * * ]] + ``` + + where the entries marked with ``*`` are not used by design, but overwritten with + zeros. + + """ + + # --- Variable declarations --- + + cdef int64_t iter_row, fact_curr_base_idx + cdef int64_t mat_row_base_idx_1 = mat_n_cols # base index for the second row + cdef int64_t mat_row_base_idx_2 = 2 * mat_n_cols # base index for the third row + cdef int64_t mat_row_base_idx_3 = 3 * mat_n_cols # base index for the fourth row + cdef int64_t mat_row_base_idx_4 = 4 * mat_n_cols # base index for the fifth row + cdef double ps_i, rho_i # psi, rho + cdef double si_i, si_i_minus_1, si_i_plus_1 # sigma + cdef double phi_i, phi_i_minus_1, phi_i_plus_1 # phi + + # --- Factorization --- + + # NOTE: in the following ps is manually checked for zero-division to extract the + # proper value of ``info`` and exit early in case of failure; + # ``info`` is set to the row count where the error occured as for LAPACK ``pbtrf`` + + # First row + + ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1] + if ps_i == 0.0: + return mat_n_cols + + si_i_plus_1 = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] / ps_i + phi_i_plus_1 = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1] / ps_i + + fact_curr_base_idx = (mat_n_cols - 1) * MAT_FACT_N_COLS + mat_factorized[fact_curr_base_idx + 4] = 0.0 + mat_factorized[fact_curr_base_idx + 3] = 0.0 + mat_factorized[fact_curr_base_idx + 2] = ps_i + mat_factorized[fact_curr_base_idx + 1] = si_i_plus_1 + mat_factorized[fact_curr_base_idx] = phi_i_plus_1 + + # Second row + rho_i = mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] + ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - si_i_plus_1 * rho_i + if ps_i == 0.0: + return mat_n_cols - 1 + + si_i = (mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - phi_i_plus_1 * rho_i) / ps_i + phi_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2] / ps_i + + fact_curr_base_idx -= MAT_FACT_N_COLS + mat_factorized[fact_curr_base_idx + 4] = 0.0 + mat_factorized[fact_curr_base_idx + 3] = rho_i + mat_factorized[fact_curr_base_idx + 2] = ps_i + mat_factorized[fact_curr_base_idx + 1] = si_i + mat_factorized[fact_curr_base_idx] = phi_i + + # Central rows + for iter_row in range(mat_n_cols - 3, 1, -1): + b_i = mat_flat[iter_row] + rho_i = mat_flat[mat_row_base_idx_1 + iter_row] - si_i_plus_1 * b_i + ps_i = mat_flat[mat_row_base_idx_2 + iter_row] - phi_i_plus_1 * b_i - si_i * rho_i + if ps_i == 0.0: + return iter_row + 1 + + si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + iter_row] - phi_i * rho_i) / ps_i + si_i_plus_1 = si_i + si_i = si_i_minus_1 + phi_i_minus_1 = mat_flat[mat_row_base_idx_4 + iter_row] / ps_i + phi_i_plus_1 = phi_i + phi_i = phi_i_minus_1 + + fact_curr_base_idx -= MAT_FACT_N_COLS + mat_factorized[fact_curr_base_idx + 4] = b_i + mat_factorized[fact_curr_base_idx + 3] = rho_i + mat_factorized[fact_curr_base_idx + 2] = ps_i + mat_factorized[fact_curr_base_idx + 1] = si_i + mat_factorized[fact_curr_base_idx] = phi_i + + # Second to last row + b_i = mat_flat[1] + rho_i = mat_flat[mat_row_base_idx_1 + 1] - si_i_plus_1 * b_i + ps_i = mat_flat[mat_row_base_idx_2 + 1] - phi_i_plus_1 * b_i - si_i * rho_i + if ps_i == 0.0: + return 2 + + si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + 1] - phi_i * rho_i) / ps_i + si_i_plus_1 = si_i + si_i = si_i_minus_1 + + mat_factorized[9] = b_i + mat_factorized[8] = rho_i + mat_factorized[7] = ps_i + mat_factorized[6] = si_i + mat_factorized[5] = 0.0 + + # Last row + b_i = mat_flat[0] + rho_i = mat_flat[mat_row_base_idx_1 + 0] - si_i_plus_1 * b_i + ps_i = mat_flat[mat_row_base_idx_2 + 0] - phi_i * b_i - si_i * rho_i + if ps_i == 0.0: + return 1 + + mat_factorized[4] = b_i + mat_factorized[3] = rho_i + mat_factorized[2] = ps_i + mat_factorized[1] = 0.0 + mat_factorized[0] = 0.0 + + return 0 + + +cdef int _c_core_factorize_solve_algo_2( + int64_t mat_n_cols, + double* mat_factorized, + double* rhs_single, + int64_t rhs_n_cols, + double* result_view, +) noexcept nogil: + + """ + Solves the pentadiagonal system of equations ``Ax = b`` with the factorized + unit lower triangular matrix ``L`` and the right-hand side ``b``. + It overwrites the right-hand side ``b`` first with the transformed vector ``omega`` + and then with the solution vector ``x`` for ``Lx = omega``. + + """ + + # --- Variable declarations --- + + cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx + cdef double om_i, om_i_minus_1, om_i_plus_1 # omega + + # --- Transformation --- + + # first, the right-hand side is transformed into the vector ``omega`` + # First row + fact_curr_base_idx = (mat_n_cols - 1) * MAT_FACT_N_COLS + res_curr_base_idx = (mat_n_cols - 1) * rhs_n_cols + + om_i_plus_1 = rhs_single[res_curr_base_idx] / mat_factorized[fact_curr_base_idx + 2] + result_view[res_curr_base_idx] = om_i_plus_1 + + # Second row + fact_curr_base_idx -= MAT_FACT_N_COLS + res_curr_base_idx -= rhs_n_cols + + om_i = ( + rhs_single[res_curr_base_idx] + - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 3] + ) / mat_factorized[fact_curr_base_idx + 2] + result_view[res_curr_base_idx] = om_i + + # Central rows + for iter_row in range(mat_n_cols - 3, 1, -1): + fact_curr_base_idx -= MAT_FACT_N_COLS + res_curr_base_idx -= rhs_n_cols + + om_i_minus_1 = ( + rhs_single[res_curr_base_idx] + - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 4] + - om_i * mat_factorized[fact_curr_base_idx + 3] + ) / mat_factorized[fact_curr_base_idx + 2] + om_i_plus_1 = om_i + om_i = om_i_minus_1 + result_view[res_curr_base_idx] = om_i + + # Second to last row + fact_curr_base_idx -= MAT_FACT_N_COLS + res_curr_base_idx -= rhs_n_cols + + om_i_minus_1 = ( + rhs_single[res_curr_base_idx] + - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 4] + - om_i * mat_factorized[fact_curr_base_idx + 3] + ) / mat_factorized[fact_curr_base_idx + 2] + om_i_plus_1 = om_i + om_i = om_i_minus_1 + result_view[res_curr_base_idx] = om_i + + # Last row + om_i_minus_1 = ( + rhs_single[0] + - om_i_plus_1 * mat_factorized[4] + - om_i * mat_factorized[3] + ) / mat_factorized[2] + result_view[0] = om_i_minus_1 + + # --- Forward substitution --- + + # The solution vector is calculated by forward substitution that overwrites the + # right-hand side vector with the solution vector + om_i -= mat_factorized[fact_curr_base_idx + 1] * om_i_minus_1 + result_view[res_curr_base_idx] = om_i + + for iter_row in range(2, mat_n_cols): + fact_curr_base_idx += MAT_FACT_N_COLS + res_curr_base_idx += rhs_n_cols + + result_view[res_curr_base_idx] = ( + result_view[res_curr_base_idx] + - mat_factorized[fact_curr_base_idx] * om_i_minus_1 + - mat_factorized[fact_curr_base_idx + 1] * om_i + ) + om_i_minus_1 = om_i + om_i = result_view[res_curr_base_idx] + + return 0 diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py index 3db7126..fc5b29e 100644 --- a/src/pentapy/tools.py +++ b/src/pentapy/tools.py @@ -14,11 +14,19 @@ create_full """ -# pylint: disable=C0103 +# === Imports === + +from typing import Optional, Tuple, Type + import numpy as np +# === Functions === + -def diag_indices(n, offset=0): +def diag_indices( + n: int, + offset: int = 0, +) -> Tuple[np.ndarray, np.ndarray]: """ Get indices for the main or minor diagonals of a matrix. @@ -28,17 +36,17 @@ def diag_indices(n, offset=0): Parameters ---------- - n : int + n : :class:`int` The size, along each dimension, of the arrays for which the returned indices can be used. - offset : int, optional - The diagonal offset. + offset : :class:`int`, optional + The diagonal offset. Default: 0 Returns ------- - idx : :class:`numpy.ndarray` + idx : :class:`numpy.ndarray` of shape (n - abs(offset),) row indices - idy : :class:`numpy.ndarray` + idy : :class:`numpy.ndarray` of shape (n - abs(offset),) col indices """ @@ -47,13 +55,20 @@ def diag_indices(n, offset=0): return idx, idy -def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): - """Shift rows of a banded matrix. +def shift_banded( + mat: np.ndarray, + up: int = 2, + low: int = 2, + col_to_row: bool = True, + copy: bool = True, +) -> np.ndarray: + """ + Shift rows of a banded matrix. Either from column-wise to row-wise storage or vice versa. - The Matrix has to be given as a flattend matrix. - Either in a column-wise flattend form:: + The Matrix has to be given as a flattened matrix. + Either in a column-wise flattened form:: [[0 0 Dup2[2] ... Dup2[N-2] Dup2[N-1] Dup2[N] ] [0 Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] Dup1[N] ] @@ -65,7 +80,7 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): col_to_row=True - Or in a row-wise flattend form:: + Or in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] [Dup1[0] Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] 0 ] @@ -83,11 +98,11 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): Parameters ---------- - mat : :class:`numpy.ndarray` + mat : :class:`numpy.ndarray` of shape (5, n) The Matrix or the flattened Version of the pentadiagonal matrix. - up : :class:`int` + up : :class:`int`, optional The number of upper minor-diagonals. Default: 2 - low : :class:`int` + low : :class:`int`, optional The number of lower minor-diagonals. Default: 2 col_to_row : :class:`bool`, optional Shift from column-wise to row-wise storage or vice versa. @@ -97,13 +112,19 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): Returns ------- - :class:`numpy.ndarray` - Shifted bandend matrix + :class:`numpy.ndarray` of shape (5, n) + Shifted banded matrix + """ + + # first, the matrix is copied if required if copy: mat_flat = np.copy(mat) else: mat_flat = mat + + # then, the shifting is performed + # Case 1: Column-wise to row-wise if col_to_row: for i in range(up): mat_flat[i, : -(up - i)] = mat_flat[i, (up - i) :] @@ -111,21 +132,32 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): for i in range(low): mat_flat[-i - 1, (low - i) :] = mat_flat[-i - 1, : -(low - i)] mat_flat[-i - 1, : (low - i)] = 0 - else: - for i in range(up): - mat_flat[i, (up - i) :] = mat_flat[i, : -(up - i)] - mat_flat[i, : (up - i)] = 0 - for i in range(low): - mat_flat[-i - 1, : -(low - i)] = mat_flat[-i - 1, (low - i) :] - mat_flat[-i - 1, -(low - i) :] = 0 + + return mat_flat + + # Case 2: Row-wise to column-wise + for i in range(up): + mat_flat[i, (up - i) :] = mat_flat[i, : -(up - i)] + mat_flat[i, : (up - i)] = 0 + for i in range(low): + mat_flat[-i - 1, : -(low - i)] = mat_flat[-i - 1, (low - i) :] + mat_flat[-i - 1, -(low - i) :] = 0 + return mat_flat -def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): - """Create a banded matrix from a given quadratic Matrix. +def create_banded( + mat: np.ndarray, + up: int = 2, + low: int = 2, + col_wise: bool = True, + dtype: Optional[Type] = None, +) -> np.ndarray: + """ + Create a banded matrix from a given square Matrix. - The Matrix will to be returned as a flattend matrix. - Either in a column-wise flattend form:: + The Matrix will to be returned as a flattened matrix. + Either in a column-wise flattened form:: [[0 0 Dup2[2] ... Dup2[N-2] Dup2[N-1] Dup2[N] ] [0 Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] Dup1[N] ] @@ -137,7 +169,7 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): col_wise=True - Or in a row-wise flattend form:: + Or in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] [Dup1[0] Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] 0 ] @@ -155,51 +187,74 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): Parameters ---------- - mat : :class:`numpy.ndarray` + mat : :class:`numpy.ndarray` of shape (n, n) The full (n x n) Matrix. - up : :class:`int` + up : :class:`int`, optional The number of upper minor-diagonals. Default: 2 - low : :class:`int` + low : :class:`int`, optional The number of lower minor-diagonals. Default: 2 col_wise : :class:`bool`, optional Use column-wise storage. If False, use row-wise storage. Default: ``True`` + dtype : :class:`type` or ``None``, optional + The data type of the returned matrix. If ``None``, the data type of the + input matrix is preserved. Default: ``None`` Returns ------- - :class:`numpy.ndarray` - Bandend matrix + :class:`numpy.ndarray` of shape (5, n) + Banded matrix + """ + + # first, the matrix is checked mat = np.asanyarray(mat) if mat.ndim != 2: - msg = "create_banded: matrix has to be 2D" + msg = f"create_banded: matrix has to be 2D, got {mat.ndim}D" raise ValueError(msg) + if mat.shape[0] != mat.shape[1]: - msg = "create_banded: matrix has to be n x n" + msg = ( + f"create_banded: matrix has to be n x n, " + f"got {mat.shape[0]} x {mat.shape[1]}" + ) raise ValueError(msg) + # then, the matrix is created + dtype = mat.dtype if dtype is None else dtype size = mat.shape[0] - mat_flat = np.zeros((5, size), dtype=dtype) + mat_flat = np.zeros(shape=(5, size), dtype=dtype) mat_flat[up, :] = mat.diagonal() + # Case 1: Column-wise storage if col_wise: for i in range(up): mat_flat[i, (up - i) :] = mat.diagonal(up - i) for i in range(low): mat_flat[-i - 1, : -(low - i)] = mat.diagonal(-(low - i)) - else: - for i in range(up): - mat_flat[i, : -(up - i)] = mat.diagonal(up - i) - for i in range(low): - mat_flat[-i - 1, (low - i) :] = mat.diagonal(-(low - i)) + + return mat_flat + + # Case 2: Row-wise storage + for i in range(up): + mat_flat[i, : -(up - i)] = mat.diagonal(up - i) + for i in range(low): + mat_flat[-i - 1, (low - i) :] = mat.diagonal(-(low - i)) + return mat_flat -def create_full(mat, up=2, low=2, col_wise=True): - """Create a (n x n) Matrix from a given banded matrix. +def create_full( + mat: np.ndarray, + up: int = 2, + low: int = 2, + col_wise: bool = True, +) -> np.ndarray: + """ + Create an (n x n) Matrix from a given banded matrix. - The given Matrix has to be a flattend matrix. - Either in a column-wise flattend form:: + The given Matrix has to be a flattened matrix. + Either in a column-wise flattened form:: [[0 0 Dup2[2] ... Dup2[N-2] Dup2[N-1] Dup2[N] ] [0 Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] Dup1[N] ] @@ -211,7 +266,7 @@ def create_full(mat, up=2, low=2, col_wise=True): col_wise=True - Or in a row-wise flattend form:: + Or in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] [Dup1[0] Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] 0 ] @@ -229,11 +284,11 @@ def create_full(mat, up=2, low=2, col_wise=True): Parameters ---------- - mat : :class:`numpy.ndarray` + mat : :class:`numpy.ndarray` of shape (5, n) The flattened Matrix. - up : :class:`int` + up : :class:`int`, optional The number of upper minor-diagonals. Default: 2 - low : :class:`int` + low : :class:`int`, optional The number of lower minor-diagonals. Default: 2 col_wise : :class:`bool`, optional Input is in column-wise storage. If False, use as row-wise storage. @@ -241,41 +296,60 @@ def create_full(mat, up=2, low=2, col_wise=True): Returns ------- - :class:`numpy.ndarray` + :class:`numpy.ndarray` of shape (n, n) Full matrix. + """ + + # first, the matrix is checked mat = np.asanyarray(mat) if mat.ndim != 2: - msg = "create_full: matrix has to be 2D" + msg = f"create_full: matrix has to be 2D, got {mat.ndim}D" raise ValueError(msg) + if mat.shape[0] != up + low + 1: - msg = "create_full: matrix has wrong count of bands" + msg = ( + f"create_full: matrix has wrong count of bands, required " + f"{up} + {low} + 1 = {up + low + 1}, got {mat.shape[0]} bands" + ) raise ValueError(msg) + if mat.shape[1] < max(up, low) + 1: - msg = "create_full: matrix has to few information" + msg = ( + f"create_full: matrix has to few information, required " + f"{max(up, low) + 1} columns, got {mat.shape[1]} columns" + ) raise ValueError(msg) + + # then, the matrix is created size = mat.shape[1] mat_full = np.diag(mat[up]) + + # Case 1: Column-wise storage if col_wise: for i in range(up): mat_full[diag_indices(size, up - i)] = mat[i, (up - i) :] for i in range(low): mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, : -(low - i)] - else: - for i in range(up): - mat_full[diag_indices(size, up - i)] = mat[i, : -(up - i)] - for i in range(low): - mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, (low - i) :] + + return mat_full + + # Case 2: Row-wise storage + for i in range(up): + mat_full[diag_indices(size, up - i)] = mat[i, : -(up - i)] + for i in range(low): + mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, (low - i) :] + return mat_full -def _check_penta(mat): +def _check_penta(mat: np.ndarray) -> None: if mat.ndim != 2: - msg = "pentapy: matrix has to be 2D" + msg = f"pentapy: matrix has to be 2D, got {mat.ndim}D" raise ValueError(msg) if mat.shape[0] != 5: - msg = "pentapy: matrix needs 5 bands" + msg = f"pentapy: matrix needs 5 bands, got {mat.shape[0]} bands" raise ValueError(msg) if mat.shape[1] < 3: - msg = "pentapy: matrix needs at least 3 rows" + msg = f"pentapy: matrix needs at least 3 rows, got {mat.shape[1]} rows" raise ValueError(msg) diff --git a/tests/templates.py b/tests/templates.py new file mode 100644 index 0000000..554b44f --- /dev/null +++ b/tests/templates.py @@ -0,0 +1,271 @@ +""" +This test suite implements reusable templates for testing the pentadiagonal solver based +on either Algorithm PTRANS-I or PTRANS-II. +""" + +# === Imports === + +from typing import Dict, Literal + +import numpy as np +import pentapy as pp +import pytest +import util_funcs as uf + +# === Constants === + +SEED = 19_031_977 +SINGULAR_WARNING_REF_CONTENT = "solver encountered singular matrix" +SHAPE_MISMATCH_ERROR_REF_CONTENT = "shape mismatch between the number of equations" +N_ROWS = [ + 3, # important edge case + 4, # important edge case + 5, # important edge case + 10, # even + 11, # odd + 50, # even + 51, # odd + 100, # ... + 101, + 1_000, + 1_001, +] +SOLVER_ALIASES_PTRANS_I = [1, "1", "pTrAnS-I"] +SOLVER_ALIASES_PTRANS_II = [2, "2", "pTrAnS-Ii"] + +PARAM_DICT = { + "n_rows": N_ROWS, + "n_rhs": [None, 1, 10], + "input_layout": ["full", "banded_row_wise", "banded_col_wise"], + "solver_alias": SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II, + "induce_error": [False, True], + "from_order": ["C", "F"], + "num_threads": [1], +} + +# === Auxiliary functions === + + +def convert_matrix_to_layout( + mat: np.ndarray, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], +) -> tuple[np.ndarray, Dict[str, bool]]: + """ + Converts a dense pentadiagonal matrix to the desired layout. + + """ + + if input_layout == "full": + return ( + mat, + dict(is_flat=False), + ) + + elif input_layout == "banded_row_wise": + return ( + pp.create_banded(mat, col_wise=False), + dict( + is_flat=True, + index_row_wise=True, + ), + ) + + elif input_layout == "banded_col_wise": + return ( + pp.create_banded(mat, col_wise=True), + dict( + is_flat=True, + index_row_wise=False, + ), + ) + + else: + raise ValueError(f"Invalid input layout: {input_layout}") + + +def convert_matrix_to_order( + mat: np.ndarray, + from_order: Literal["C", "F"], +) -> np.ndarray: + """ + Converts a dense pentadiagonal matrix to the desired order. + + """ + + if from_order == "C": + return np.ascontiguousarray(mat) + + elif from_order == "F": + return np.asfortranarray(mat) + + else: + raise ValueError(f"Invalid from order: {from_order=}") + + +# === Templates === + + +def pentapy_solvers_extended_template( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + induce_error: bool, + from_order: Literal["C", "F"], + num_threads: int, +) -> None: + """ + Tests the pentadiagonal solvers when starting from different input layouts, number + of right-hand sides, number of rows, and also when inducing an error by making the + first or last diagonal element exactly zero. + It has to be ensured that the edge case of ``n_rows = 3`` is also covered. + + For ``n_rows = 3``, the error is induced by initialising a matrix of zeros. + + """ + + # first, a random pentadiagonal matrix is generated + mat_full = uf.gen_conditioned_rand_penta_matrix_dense( + n_rows=n_rows, + seed=SEED, + ill_conditioned=False, + ) + + # an error is induced by setting the first or last diagonal element to zero + if induce_error: + # the induction of the error is only possible if the matrix does not have + # only 3 rows + if n_rows == 3: + mat_full = np.zeros_like(mat_full) + + elif solver_alias in SOLVER_ALIASES_PTRANS_I: + mat_full[0, 0] = 0.0 + else: + mat_full[n_rows - 1, n_rows - 1] = 0.0 + + # the right-hand side is generated + np.random.seed(SEED) + if n_rhs is not None: + rhs = np.random.rand(n_rows, n_rhs) + result_shape = (n_rows, n_rhs) + else: + rhs = np.random.rand(n_rows) + result_shape = (n_rows,) + + # the matrix is converted to the desired layout + mat, kwargs = convert_matrix_to_layout(mat_full, input_layout) + + # the left-hand side matrix and right-hand side is converted to the desired order + mat = convert_matrix_to_order(mat=mat, from_order=from_order) + rhs = convert_matrix_to_order(mat=rhs, from_order=from_order) + + # the solution is computed + # Case 1: in case of an error, a warning has to be issued and the result has to + # be NaN + if induce_error: + with pytest.warns(UserWarning, match=SINGULAR_WARNING_REF_CONTENT): + mat_ref_copy = mat.copy() + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + num_threads=num_threads, + **kwargs, + ) + + assert sol.shape == result_shape + assert np.isnan(sol).all() + assert np.array_equal(mat, mat_ref_copy) + + return + + # Case 2: in case of no error, the solution can be computed without any issues + mat_ref_copy = mat.copy() + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + num_threads=num_threads, + **kwargs, + ) + assert sol.shape == result_shape + assert np.array_equal(mat, mat_ref_copy) + + # if no error was induced, the reference solution is computed with SciPy + sol_ref = uf.solve_penta_matrix_dense_scipy( + mat=mat_full, + rhs=rhs, + ) + + # the solutions are compared + assert np.allclose(sol, sol_ref) + + return + + +def pentapy_solvers_shape_mismatch_template( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + from_order: Literal["C", "F"], + num_threads: int, +) -> None: + """ + Tests the pentadiagonal solvers when the shape of the right-hand side is incorrect, + starting from different input layouts, number of right-hand sides, and number of + rows. + + """ + + # first, a random pentadiagonal matrix is generated + mat_full = uf.gen_conditioned_rand_penta_matrix_dense( + n_rows=n_rows, + seed=SEED, + ill_conditioned=False, + ) + + # the right-hand side is generated with a wrong shape (rows + 10) + np.random.seed(SEED) + if n_rhs is not None: + rhs = np.random.rand(n_rows + 10, n_rhs) + else: + rhs = np.random.rand(n_rows + 10) + + # the matrix is converted to the desired layout + mat, kwargs = convert_matrix_to_layout(mat_full, input_layout) + + # the left-hand side matrix and right-hand side is converted to the desired order + mat = convert_matrix_to_order(mat=mat, from_order=from_order) + rhs = convert_matrix_to_order(mat=rhs, from_order=from_order) + + # the solution is computed, but due to the wrong shape of the right-hand side, an + # error has to be raised + with pytest.raises(ValueError, match=SHAPE_MISMATCH_ERROR_REF_CONTENT): + pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + num_threads=num_threads, + **kwargs, + ) + + return diff --git a/tests/test_pentapy.py b/tests/test_pentapy.py deleted file mode 100755 index 558de5e..0000000 --- a/tests/test_pentapy.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -This is the unittest for pentapy. -""" - -import unittest - -# import platform -import warnings - -import numpy as np - -import pentapy as pp - -warnings.simplefilter("always") - - -class TestPentapy(unittest.TestCase): - def setUp(self): - self.seed = 19031977 - self.size = 1000 - self.rand = np.random.RandomState(self.seed) - self.mat = (self.rand.rand(5, self.size) - 0.5) * 1e-5 - self.rhs = self.rand.rand(self.size) * 1e5 - - def test_tools(self): - self.mat_int = np.zeros((100, 100), dtype=int) - # fill bands of pentadiagonal matrix - self.mat_int[pp.diag_indices(100, 0)] = self.rand.randint(1, 1000, size=100) - self.mat_int[pp.diag_indices(100, 1)] = self.rand.randint(1, 1000, size=99) - self.mat_int[pp.diag_indices(100, 2)] = self.rand.randint(1, 1000, size=98) - self.mat_int[pp.diag_indices(100, -1)] = self.rand.randint(1, 1000, size=99) - self.mat_int[pp.diag_indices(100, -2)] = self.rand.randint(1, 1000, size=98) - # create banded - self.mat_int_col = pp.create_banded(self.mat_int) - self.mat_int_row = pp.create_banded(self.mat_int, col_wise=False) - # create full - self.mat_int_col_ful = pp.create_full(self.mat_int_col, col_wise=True) - self.mat_int_row_ful = pp.create_full(self.mat_int_row, col_wise=False) - # shifting - self.mat_shift_cr = pp.shift_banded(self.mat_int_col) - self.mat_shift_rc = pp.shift_banded(self.mat_int_row, col_to_row=False) - # in place shifting - self.mat_int_col_ip = pp.create_banded(self.mat_int) - self.mat_int_row_ip = pp.create_banded(self.mat_int, col_wise=False) - pp.shift_banded(self.mat_int_col_ip, copy=False) - pp.shift_banded(self.mat_int_row_ip, copy=False, col_to_row=False) - # checking - self.assertEqual(np.sum(self.mat_int > 0), 494) - self.assertTrue(np.array_equal(self.mat_int_col, self.mat_shift_rc)) - self.assertTrue(np.array_equal(self.mat_int_row, self.mat_shift_cr)) - self.assertTrue(np.array_equal(self.mat_int_col, self.mat_int_row_ip)) - self.assertTrue(np.array_equal(self.mat_int_row, self.mat_int_col_ip)) - self.assertTrue(np.array_equal(self.mat_int, self.mat_int_col_ful)) - self.assertTrue(np.array_equal(self.mat_int, self.mat_int_row_ful)) - - def test_solve1(self): - self.mat_col = pp.shift_banded(self.mat, col_to_row=False) - self.mat_ful = pp.create_full(self.mat, col_wise=False) - - sol_row = pp.solve(self.mat, self.rhs, is_flat=True, solver=1) - sol_col = pp.solve( - self.mat_col, - self.rhs, - is_flat=True, - index_row_wise=False, - solver=1, - ) - sol_ful = pp.solve(self.mat_ful, self.rhs, solver=1) - - diff_row = np.max(np.abs(np.dot(self.mat_ful, sol_row) - self.rhs)) - diff_col = np.max(np.abs(np.dot(self.mat_ful, sol_col) - self.rhs)) - diff_ful = np.max(np.abs(np.dot(self.mat_ful, sol_ful) - self.rhs)) - - diff_row_col = np.max(np.abs(self.mat_ful - pp.create_full(self.mat_col))) - self.assertAlmostEqual(diff_row * 1e-5, 0.0) - self.assertAlmostEqual(diff_col * 1e-5, 0.0) - self.assertAlmostEqual(diff_ful * 1e-5, 0.0) - self.assertAlmostEqual(diff_row_col * 1e5, 0.0) - - def test_solve2(self): - self.mat_col = pp.shift_banded(self.mat, col_to_row=False) - self.mat_ful = pp.create_full(self.mat, col_wise=False) - - sol_row = pp.solve(self.mat, self.rhs, is_flat=True, solver=2) - sol_col = pp.solve( - self.mat_col, - self.rhs, - is_flat=True, - index_row_wise=False, - solver=2, - ) - sol_ful = pp.solve(self.mat_ful, self.rhs, solver=2) - - diff_row = np.max(np.abs(np.dot(self.mat_ful, sol_row) - self.rhs)) - diff_col = np.max(np.abs(np.dot(self.mat_ful, sol_col) - self.rhs)) - diff_ful = np.max(np.abs(np.dot(self.mat_ful, sol_ful) - self.rhs)) - - diff_row_col = np.max(np.abs(self.mat_ful - pp.create_full(self.mat_col))) - self.assertAlmostEqual(diff_row * 1e-5, 0.0) - self.assertAlmostEqual(diff_col * 1e-5, 0.0) - self.assertAlmostEqual(diff_ful * 1e-5, 0.0) - self.assertAlmostEqual(diff_row_col * 1e5, 0.0) - - def test_error(self): - self.err_mat = np.array( - [[3, 2, 1, 0], [-3, -2, 7, 1], [3, 2, -1, 5], [0, 1, 2, 3]] - ) - self.err_rhs = np.array([6, 3, 9, 6]) - sol_2 = pp.solve(self.err_mat, self.err_rhs, is_flat=False, solver=2) - diff_2 = np.max(np.abs(np.dot(self.err_mat, sol_2) - self.err_rhs)) - self.assertAlmostEqual(diff_2, 0.0) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_solvers_external.py b/tests/test_solvers_external.py new file mode 100644 index 0000000..0f407aa --- /dev/null +++ b/tests/test_solvers_external.py @@ -0,0 +1,126 @@ +""" +Test suite for testing the external solvers that can be called via pentapy. The tests +are not exhaustive and only check whether the solvers can be called and return a +solution. + +""" + +# === Imports === + +from typing import Literal + +import numpy as np +import pytest +import util_funcs as uf + +import pentapy as pp + +# === Constants === + +SEED = 19_031_977 +N_ROWS = [ + 3, + 4, + 5, + 10, + 11, + 25, + 26, + 50, + 51, +] +REF_WARNING_CONTENT = "singular" +SOLVER_ALIASES_LAPACK = [3, "3", "LaPaCk"] +SOLVER_ALIASES_SPSOLVE = [4, "4", "SpSoLvE"] + +# === Tests === + + +@pytest.mark.parametrize("induce_error", [False, True]) +@pytest.mark.parametrize("solver_alias", SOLVER_ALIASES_LAPACK + SOLVER_ALIASES_SPSOLVE) +@pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"]) +@pytest.mark.parametrize("n_rhs", [None, 1, 10]) +@pytest.mark.parametrize("n_rows", N_ROWS) +def test_external_solvers( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[1, "1", "PTRANS-I"], + induce_error: bool, +) -> None: + """ + Tests the external bindings for solving pentadiagonal systems starting from + different input layouts, number of right-hand sides, number of rows, and when an + error is induced by a zero matrix. + It has to be ensured that the edge case of ``n_rows = 3`` is also covered. + + """ + + # first, a random pentadiagonal matrix is generated + mat_full = np.zeros(shape=(n_rows, n_rows)) + if not induce_error: + mat_full[::, ::] = uf.gen_conditioned_rand_penta_matrix_dense( + n_rows=n_rows, + seed=SEED, + ill_conditioned=False, + ) + + # the right-hand side is generated + np.random.seed(SEED) + if n_rhs is not None: + rhs = np.random.rand(n_rows, n_rhs) + result_shape = (n_rows, n_rhs) + else: + rhs = np.random.rand(n_rows) + result_shape = (n_rows,) + + # the matrix is converted to the desired layout + if input_layout == "full": + mat = mat_full + kwargs = dict(is_flat=False) + + elif input_layout == "banded_row_wise": + mat = pp.create_banded(mat_full, col_wise=False) + kwargs = dict( + is_flat=True, + index_row_wise=True, + ) + + elif input_layout == "banded_col_wise": + mat = pp.create_banded(mat_full, col_wise=True) + kwargs = dict( + is_flat=True, + index_row_wise=False, + ) + + else: + raise ValueError(f"Invalid input layout: {input_layout}") + + # the solution is computed + # Case 1: in case of an error, a warning has to be issued and the result has to + # be NaN + if induce_error: + with pytest.warns(UserWarning, match=REF_WARNING_CONTENT): + mat_ref_copy = mat.copy() + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + **kwargs, + ) + assert sol.shape == result_shape + assert np.isnan(sol).all() + assert np.array_equal(mat, mat_ref_copy) + + return + + # Case 2: in case of no error, the solution can be computed without any issues + mat_ref_copy = mat.copy() + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + **kwargs, + ) + assert sol.shape == result_shape + assert np.array_equal(mat, mat_ref_copy) diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py new file mode 100644 index 0000000..b6a3b4e --- /dev/null +++ b/tests/test_solvers_internal_parallel.py @@ -0,0 +1,128 @@ +""" +Test suite for testing the pentadiagonal solver based on either Algorithm PTRANS-I or +PTRANS-II. + +It tests them in PARALLEL mode. + +""" + +# === Imports === + +from copy import deepcopy +from typing import Literal, Optional + +import pytest +import templates + +# === Tests === + +# the following series of decorators parametrize the tests for the pentadiagonal solver +# based on either Algorithm PTRANS-I or PTRANS-II in parallel mode +param_dict = deepcopy(templates.PARAM_DICT) +param_dict["from_order"] = ["C"] +param_dict["num_threads"] = [-1] + +# --- Extended solve test --- + + +def test_pentapy_solvers_parallel( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + induce_error: bool, + from_order: Literal["C", "F"], + num_threads: int, +) -> None: + + templates.pentapy_solvers_extended_template( + n_rows=n_rows, + n_rhs=n_rhs, + input_layout=input_layout, + solver_alias=solver_alias, + induce_error=induce_error, + from_order=from_order, + num_threads=num_threads, + ) + + +for key, value in param_dict.items(): + test_pentapy_solvers_parallel = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_parallel + ) + + +# --- Different number of threads test --- + + +@pytest.mark.parametrize("num_threads", [0, 1, -1, -2, None]) +def test_pentapy_solvers_parallel_different_num_threads( + num_threads: Optional[int], +) -> None: + """ + Tests that the parallel solvers run properly with different numbers of threads. + + """ + + kwargs = dict( + n_rows=10, + n_rhs=1, + input_layout="full", + solver_alias=1, + induce_error=False, + from_order="C", + num_threads=num_threads, + ) + + # NOTE: if there is no crash, the test is successful + templates.pentapy_solvers_extended_template(**kwargs) # type: ignore + + +# --- Shape mismatch test --- + + +def test_pentapy_solvers_shape_mismatch_parallel( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + from_order: Literal["C", "F"], + num_threads: int, +) -> None: + + templates.pentapy_solvers_shape_mismatch_template( + n_rows=n_rows, + n_rhs=n_rhs, + input_layout=input_layout, + solver_alias=solver_alias, + from_order=from_order, + num_threads=num_threads, + ) + + +params_dict_without_induce_error = deepcopy(templates.PARAM_DICT) +params_dict_without_induce_error["num_threads"] = [-1] +params_dict_without_induce_error.pop("induce_error") + +for key, value in params_dict_without_induce_error.items(): + test_pentapy_solvers_shape_mismatch_parallel = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_shape_mismatch_parallel + ) diff --git a/tests/test_solvers_internal_serial.py b/tests/test_solvers_internal_serial.py new file mode 100644 index 0000000..0957a65 --- /dev/null +++ b/tests/test_solvers_internal_serial.py @@ -0,0 +1,100 @@ +""" +Test suite for testing the pentadiagonal solver based on either Algorithm PTRANS-I or +PTRANS-II. + +It tests them in SERIAL mode only. + +""" + +# === Imports === + +from copy import deepcopy +from typing import Literal + +import pytest +import templates + +# === Tests === + +# the following series of decorators parametrize the tests for the pentadiagonal solver +# based on either Algorithm PTRANS-I or PTRANS-II in serial mode + + +# --- Extended solve test --- + + +def test_pentapy_solvers_extended_serial( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + induce_error: bool, + from_order: Literal["C", "F"], + num_threads: int, +) -> None: + + templates.pentapy_solvers_extended_template( + n_rows=n_rows, + n_rhs=n_rhs, + input_layout=input_layout, + solver_alias=solver_alias, + induce_error=induce_error, + from_order=from_order, + num_threads=num_threads, + ) + + +for key, value in templates.PARAM_DICT.items(): + test_pentapy_solvers_extended_serial = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_extended_serial + ) + + +# --- Shape mismatch test --- + + +def test_pentapy_solvers_shape_mismatch_serial( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + from_order: Literal["C", "F"], + num_threads: int, +) -> None: + + templates.pentapy_solvers_shape_mismatch_template( + n_rows=n_rows, + n_rhs=n_rhs, + input_layout=input_layout, + solver_alias=solver_alias, + from_order=from_order, + num_threads=num_threads, + ) + + +params_dict_without_induce_error = deepcopy(templates.PARAM_DICT) +params_dict_without_induce_error.pop("induce_error") + + +for key, value in params_dict_without_induce_error.items(): + test_pentapy_solvers_shape_mismatch_serial = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_shape_mismatch_serial + ) diff --git a/tests/test_tools.py b/tests/test_tools.py new file mode 100644 index 0000000..cabac61 --- /dev/null +++ b/tests/test_tools.py @@ -0,0 +1,210 @@ +""" +This test suite implements the test for the ``tools`` module of the ``pentapy`` package. + +""" + +# === Imports === + +import warnings +from typing import Optional, Tuple, Type + +import numpy as np +import pytest +import util_funcs as uf + +import pentapy as pp +from pentapy.tools import _check_penta + +warnings.simplefilter("always") + +# === Constants === + +SEED = 19_031_977 +N_ROWS = [ + 3, + 4, + 5, + 10, + 11, + 25, + 26, + 50, + 51, + 100, + 101, + 250, + 251, + 500, + 501, + 1_000, + 1_001, + 2500, + 2501, + 5_000, + 5_001, + 10_000, + 10_001, +] + +# === Tests === + + +@pytest.mark.parametrize("offset", [0, 1, 2, -1, -2]) +@pytest.mark.parametrize("n_rows", N_ROWS) +def test_diag_indices(n_rows: int, offset: int) -> None: + """ + Tests the generation of the diagonal indices via the function + ``pentapy.diag_indices``. + + """ + + # the diagonal indices are obtained with NumPy and pentapy + row_idxs_ref, col_idxs_ref = uf.get_diag_indices(n=n_rows, offset=offset) + row_idxs, col_idxs = pp.diag_indices(n=n_rows, offset=offset) + + # the diagonal indices are compared + assert np.array_equal(row_idxs_ref, row_idxs) + assert np.array_equal(col_idxs_ref, col_idxs) + + +@pytest.mark.parametrize("copy", [True, False]) +@pytest.mark.parametrize("with_shift", [True, False]) +@pytest.mark.parametrize("col_wise", [True, False]) +@pytest.mark.parametrize("n_rows", N_ROWS) +def test_penta_generators( + n_rows: int, + col_wise: bool, + with_shift: bool, + copy: bool, +) -> None: + """ + Tests the generation of pentadiagonal matrices where the matrix. + + """ + + # a reference matrix is initialised + mat_full_ref = uf.gen_rand_penta_matrix_dense_int( + n_rows=n_rows, + seed=SEED, + with_pentapy_indices=False, + ) + + # then, it is turned into a banded matrix ... + mat_banded = pp.create_banded(mat_full_ref, col_wise=col_wise) + + # ... which is maybe shifted + # Case 1: copied shift + if with_shift and copy: + mat_banded = pp.shift_banded(mat_banded, col_to_row=col_wise, copy=True) + col_wise = not col_wise + + # Case 2: in-place shift + if with_shift and not copy: + mat_banded = pp.shift_banded(mat_banded, col_to_row=col_wise, copy=False) + col_wise = not col_wise + + # ... from which a full matrix is created again + mat_full = pp.create_full(mat_banded, col_wise=col_wise) + + # the matrices are compared + assert np.array_equal(mat_full_ref, mat_full) + + +@pytest.mark.parametrize( + "shape, exception", + [ + ((5, 5), None), # Valid 2D Array with 5 rows and 5 rows + ((5, 2), ValueError), # 2D Array with 5 rows but only 2 columns + ((2, 5), ValueError), # 2D Array with 2 rows but 5 columns + ((5,), ValueError), # 1D Array + ], +) +def test_create_banded_raises( + shape: Tuple[int, ...], + exception: Optional[Type[Exception]], +) -> None: + """ + Test if the function ``pentapy.create_banded`` raises the expected exceptions. + + """ + + # the test matrix is initialised + np.random.seed(SEED) + mat = np.random.rand(*shape) + + # Case 1: no exception should be raised + if exception is None: + pp.create_banded(mat) + return + + # Case 2: an exception should be raised + with pytest.raises(exception): + pp.create_banded(mat) + + +@pytest.mark.parametrize( + "shape, exception", + [ + ((5, 5), None), # Valid 2D Array with 5 bands and 5 columns + ((5, 10), None), # Valid 2D Array with 5 bands and 10 columns + ((5, 3), None), # 2D Array with 5 bands and the minimum number of columns + ((6, 20), ValueError), # 2D Array does not have 5 bands + ((4, 30), ValueError), # 2D Array does not have 5 bands + ((5, 1), ValueError), # 2D Array with 5 bands but too little columns + ((5, 2), ValueError), # 2D Array with 5 bands but too little columns + ((5,), ValueError), # 1D Array + ], +) +def test_create_full_raises( + shape: Tuple[int, ...], + exception: Optional[Type[Exception]], +) -> None: + """ + Test if the function ``pentapy.create_full`` raises the expected exceptions. + + """ + + # the test matrix is initialised + np.random.seed(SEED) + mat = np.random.rand(*shape) + + # Case 1: no exception should be raised + if exception is None: + pp.create_full(mat) + return + + # Case 2: an exception should be raised + with pytest.raises(exception): + pp.create_full(mat) + + +@pytest.mark.parametrize( + "shape, exception", + [ + ((5, 3), None), # Valid 2D Array with 5 bands and 3 rows + ((5, 2), ValueError), # 2D Array with 5 bands but less than 3 rows + ((4, 3), ValueError), # 2D Array with less than 5 bands + ((5,), ValueError), # 1D Array + ], +) +def test_check_penta( + shape: Tuple[int, ...], + exception: Optional[Type[Exception]], +) -> None: + """ + Test if the function ``pentapy.tools._check_penta`` raises the expected exceptions. + + """ + + # the test matrix is initialised + np.random.seed(SEED) + mat = np.random.rand(*shape) + + # Case 1: no exception should be raised + if exception is None: + _check_penta(mat) + return + + # Case 2: an exception should be raised + with pytest.raises(exception): + _check_penta(mat) diff --git a/tests/util_funcs.py b/tests/util_funcs.py new file mode 100644 index 0000000..0ea1582 --- /dev/null +++ b/tests/util_funcs.py @@ -0,0 +1,447 @@ +""" +This test suite implements the utility functions for testing the ``pentapy`` package. + +""" + +# === Imports === + +from functools import partial +from typing import Tuple + +import numpy as np +from scipy import linalg as spla +from scipy import sparse as sprs + +import pentapy as pp + +# === Constants === + +_MIN_DIAG_VAL = 1e-3 + + +# === Utility Functions === + + +def get_diag_indices( + n: int, + offset: int, +) -> Tuple[np.ndarray, np.ndarray]: + """ + Computes the row and column indices of the diagonal of a matrix ``mat``. + + This answer is based on the Stack Overflow answer that can be found at: + https://stackoverflow.com/a/18081653/14814813 + + Doctests + -------- + >>> # Setting up a test matrix + >>> n_rows = 5 + >>> mat = np.arange(start=0, stop=n_rows * n_rows).reshape(n_rows, n_rows) + + >>> # Getting the main diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=0) + >>> row_idxs + array([0, 1, 2, 3, 4]) + >>> col_idxs + array([0, 1, 2, 3, 4]) + >>> mat[row_idxs, col_idxs] + array([ 0, 6, 12, 18, 24]) + + >>> # Getting the first upper diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=1) + >>> row_idxs + array([0, 1, 2, 3]) + >>> col_idxs + array([1, 2, 3, 4]) + >>> mat[row_idxs, col_idxs] + array([ 1, 7, 13, 19]) + + >>> # Getting the second upper diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=2) + >>> row_idxs + array([0, 1, 2]) + >>> col_idxs + array([2, 3, 4]) + >>> mat[row_idxs, col_idxs] + array([ 2, 8, 14]) + + >>> # Getting the first lower diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=-1) + >>> row_idxs + array([1, 2, 3, 4]) + >>> col_idxs + array([0, 1, 2, 3]) + >>> mat[row_idxs, col_idxs] + array([ 5, 11, 17, 23]) + + >>> # Getting the second lower diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=-2) + >>> row_idxs + array([2, 3, 4]) + >>> col_idxs + array([0, 1, 2]) + >>> mat[row_idxs, col_idxs] + array([10, 16, 22]) + + """ + + row_idxs, col_idxs = np.diag_indices(n=n, ndim=2) + if offset < 0: + row_idx_from = -offset + row_idx_to = None + col_idx_from = 0 + col_idx_to = offset + elif offset > 0: + row_idx_from = 0 + row_idx_to = -offset + col_idx_from = offset + col_idx_to = None + else: + row_idx_from = None + row_idx_to = None + col_idx_from = None + col_idx_to = None + + return ( + row_idxs[row_idx_from:row_idx_to], + col_idxs[col_idx_from:col_idx_to], + ) + + +def gen_rand_penta_matrix_dense_int( + n_rows: int, + seed: int, + with_pentapy_indices: bool, +) -> np.ndarray: + """ + Generates a random dense pentadiagonal matrix with shape ``(n_rows, n_rows)`` and + data type ``int64``. + + Doctests + -------- + >>> # Generating a random pentadiagonal matrix with NumPy indices + >>> n_rows = 5 + >>> seed = 19_031_977 + >>> with_pentapy_indices = False + + >>> mat_no_pentapy = gen_rand_penta_matrix_dense_int( + ... n_rows=n_rows, + ... seed=seed, + ... with_pentapy_indices=with_pentapy_indices + ... ) + >>> mat_no_pentapy + array([[117, 499, 43, 0, 0], + [378, 149, 857, 353, 0], + [285, 769, 767, 229, 484], + [ 0, 717, 214, 243, 877], + [ 0, 0, 410, 611, 79]], dtype=int64) + + >>> # Generating a random pentadiagonal matrix with pentapy indices + >>> mat_with_pentapy = gen_rand_penta_matrix_dense_int( + ... n_rows=n_rows, + ... seed=seed, + ... with_pentapy_indices=True + ... ) + >>> mat_with_pentapy + array([[117, 499, 43, 0, 0], + [378, 149, 857, 353, 0], + [285, 769, 767, 229, 484], + [ 0, 717, 214, 243, 877], + [ 0, 0, 410, 611, 79]], dtype=int64) + + >>> # Checking if the two matrices are equal + >>> np.array_equal(mat_no_pentapy, mat_with_pentapy) + True + + """ + + # first, a matrix of zeros is initialised ... + mat = np.zeros((n_rows, n_rows), dtype=np.int64) + # ... together with a partially specified random vector generator + # NOTE: this ensures consistent random numbers for both cases + gen_rand_int = partial(np.random.randint, low=1, high=1_000) + + # then, the diagonal index function is obtained + diag_idx_func = get_diag_indices + if with_pentapy_indices: + diag_idx_func = pp.diag_indices + + # then, the diagonals are filled with random integers + np.random.seed(seed=seed) + for offset in range(-2, 3): + row_idxs, col_idxs = diag_idx_func(n=n_rows, offset=offset) + mat[row_idxs, col_idxs] = gen_rand_int(size=n_rows - abs(offset)) + + return mat + + +def gen_conditioned_rand_penta_matrix_dense( + n_rows: int, + seed: int, + ill_conditioned: bool, +) -> np.ndarray: + """ + Generates a well- or ill-conditioned random banded pentadiagonal matrix with shape + ``(n_rows, n_rows)``. + + This is achieved as follows: + - a fake LDU decomposition is generated where ``L`` and ``U`` are unit lower and + upper triangular matrices, respectively, and ``D`` is a diagonal matrix + - the matrix is then reconstructed by multiplying the three matrices and converting + the result to a banded matrix + + If ``D`` does not have any zeros or values of small magnitude compared to the + largest value, the matrix should be well-conditioned. + Otherwise, it is ill-conditioned. + + Doctests + -------- + >>> # Imports + >>> from math import log10 + + >>> # 1) Generating a super small well-conditioned random pentadiagonal matrix + >>> n_rows = 3 + >>> seed = 19_031_977 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> mat + array([[ 0.92453713, 0.28308514, -0.09972199], + [-0.09784268, 0.2270634 , -0.1509019 ], + [-0.23431267, 0.00468463, 0.22991003]]) + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True + >>> spla.bandwidth(mat) + (2, 2) + >>> # its condition number is computed and values below 1e10 can be considered good + >>> round(np.linalg.cond(mat), 2) + 4.98 + + >>> # 2) Generating a super small ill-conditioned random pentadiagonal matrix + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=True, + ... ) + >>> mat + array([[ 0.92453713, 0.28308514, -0.09972199], + [-0.09784268, 0.2270634 , -0.1509019 ], + [-0.23431267, 0.00468463, -0.02273771]]) + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True + >>> spla.bandwidth(mat) + (2, 2) + >>> # its condition number is computed and its value should be close to the + >>> # reciprocal floating point precision, i.e., ~1e16 + >>> round(log10(np.linalg.cond(mat)), 2) + 17.17 + + >>> # 3) Generating a small well-conditioned random pentadiagonal matrix + >>> n_rows = 7 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> np.round(mat, 2) + array([[ 0.92, -0.72, 0.73, 0. , 0. , 0. , 0. ], + [ 0.83, -0.02, 1.08, 0.41, 0. , 0. , 0. ], + [-0.58, 0.13, -0.13, -0.37, 0.18, 0. , 0. ], + [ 0. , -0.07, -0.58, 0.46, -0.31, 0.28, 0. ], + [ 0. , 0. , 0.43, 0.13, 0.39, -0.1 , -0.15], + [ 0. , 0. , 0. , 0.06, -0.14, 0.4 , 0.28], + [ 0. , 0. , 0. , 0. , -0.14, 0.36, 0.53]]) + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True + >>> spla.bandwidth(mat) + (2, 2) + >>> # its condition number is computed and values below 1e10 can be considered good + >>> round(np.linalg.cond(mat), 2) + 42.48 + + >>> # 4) Generating a small ill-conditioned random pentadiagonal matrix + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=True, + ... ) + >>> np.round(mat, 2) + array([[ 0.92, -0.72, 0.73, 0. , 0. , 0. , 0. ], + [ 0.83, -0.02, 1.08, 0.41, 0. , 0. , 0. ], + [-0.58, 0.13, -0.13, -0.37, 0.18, 0. , 0. ], + [ 0. , -0.07, -0.58, 0.46, -0.31, 0.28, 0. ], + [ 0. , 0. , 0.43, 0.13, 0.39, -0.1 , -0.15], + [ 0. , 0. , 0. , 0.06, -0.14, 0.4 , 0.28], + [ 0. , 0. , 0. , 0. , -0.14, 0.36, 0.28]]) + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True + >>> spla.bandwidth(mat) + (2, 2) + >>> # its condition number is computed and its value should be close to the + >>> # reciprocal floating point precision, i.e., ~1e16 + >>> round(log10(np.linalg.cond(mat)), 2) + 17.04 + + >>> # 5) Generating a large well-conditioned random pentadiagonal matrix + >>> n_rows = 1_000 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True + >>> spla.bandwidth(mat) + (2, 2) + >>> # its condition number is computed and values below 1e10 can be considered good + >>> round(np.linalg.cond(mat), 2) + 9571.0 + + >>> # 6) Generating a large ill-conditioned random pentadiagonal matrix + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=True, + ... ) + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True + >>> spla.bandwidth(mat) + (2, 2) + >>> # its condition number is computed and its value should be close to the + >>> # reciprocal floating point precision, i.e., ~1e16 + >>> # NOTE: the next number will be so big that it will be different on each OS + >>> # so it will only be checked if it is greater than 1e16 + >>> round(log10(np.linalg.cond(mat)), 2) >= 16 + True + + """ + + # first, the fake diagonal matrix is generated whose entries are strictly + # positive and sorted in descending order + np.random.seed(seed=seed) + d_diag = np.flip(np.sort(np.random.rand(n_rows))) + + # the conditioning is achieved by manipulating the smallest diagonal entry + # Case 1: well-conditioned matrix + if not ill_conditioned: + # here, the smallest diagonal entry is set to a value that is enforced to have + # a minimum magnitude + d_diag = np.maximum(d_diag, _MIN_DIAG_VAL) + + # Case 2: ill-conditioned matrix + else: + # here, the smallest diagonal entry is set to zero + d_diag[n_rows - 1] = 0.0 + + # ... followed by a unit lower triangular matrix with 2 sub-diagonals, but here + # the entries may be negative ... + diagonals = [ + 1.0 - 2.0 * np.random.rand(n_rows - 2), + 1.0 - 2.0 * np.random.rand(n_rows - 1), + np.ones(n_rows), + ] + l_mat = sprs.diags( + diagonals=diagonals, + offsets=[-2, -1, 0], # type: ignore + shape=(n_rows, n_rows), + format="csr", + dtype=np.float64, + ) + + # ... and an upper triangular matrix with 2 super-diagonals + diagonals = [ + np.ones(n_rows), + 1.0 - 2.0 * np.random.rand(n_rows - 1), + 1.0 - 2.0 * np.random.rand(n_rows - 2), + ] + u_mat = sprs.diags( + diagonals=diagonals, + offsets=[0, 1, 2], # type: ignore + shape=(n_rows, n_rows), + format="csr", + dtype=np.float64, + ) + + # finally, the matrix is reconstructed by multiplying the three matrices + return (l_mat.multiply(d_diag[np.newaxis, ::]).dot(u_mat)).toarray() + + +def solve_penta_matrix_dense_scipy( + mat: np.ndarray, + rhs: np.ndarray, +) -> np.ndarray: + """ + Solves a pentadiagonal matrix system using SciPy's banded solver. + + Doctests + -------- + >>> # Setting up a small test matrix and right-hand side + >>> n_rows = 5 + >>> seed = 19_031_977 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> rhs = np.random.rand(n_rows, 5) + + >>> # Solving the system using SciPy's banded solver + >>> sol = solve_penta_matrix_dense_scipy(mat=mat, rhs=rhs) + >>> np.round(sol, 2) + array([[-2.16, -0.36, 0.72, 0.23, -0.2 ], + [ 4.07, 1.3 , 0.81, 1.31, 0.48], + [ 4.05, 0.33, 2.19, 1.22, 0.58], + [-1.9 , -0.79, 1.02, -0.39, 1.02], + [ 6.31, 1.81, 1.29, 1.41, 0.37]]) + + >>> # the solution is checked by verifying that the residual is close to zero + >>> np.max(np.abs(mat @ sol - rhs)) <= np.finfo(np.float64).eps * n_rows + True + + >>> # Setting up a large test matrix and right-hand side + >>> n_rows = 1_000 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> rhs = np.random.rand(n_rows, 5) + + >>> # Solving the system using SciPy's banded solver + >>> sol = solve_penta_matrix_dense_scipy(mat=mat, rhs=rhs) + >>> # the solution is checked by verifying that the residual is close to zero + >>> np.max(np.abs(mat @ sol - rhs)) <= np.finfo(np.float64).eps * n_rows + True + + """ + + # first, the matrix is converted to LAPACK banded storage format + mat_banded = pp.create_banded(mat=mat, col_wise=True) + + # then, the system is solved using SciPy's banded solver + return spla.solve_banded( + l_and_u=(2, 2), + ab=mat_banded, + b=rhs, + ) + + +# === Doctests === + +if __name__ == "__main__": # pragma: no cover + import doctest + + doctest.testmod()