Skip to content

Commit

Permalink
Merge pull request #561 from etmc/quda_work_eigensolver
Browse files Browse the repository at this point in the history
Interface for eigensolver on QUDA
  • Loading branch information
kostrzewa authored Oct 4, 2023
2 parents 6ac0c6e + 3bb6639 commit 49b0dea
Show file tree
Hide file tree
Showing 9 changed files with 287 additions and 11 deletions.
6 changes: 6 additions & 0 deletions default_input_values.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@
#define _default_phmc_pure_phmc 0
#define _default_stilde_max 3.
#define _default_stilde_min 0.01
#define _default_eig_polydeg 128
#define _default_eig_amin 0.001
#define _default_eig_amax 4
#define _default_eig_n_kr 96
#define _default_degree_of_p 48
#define _default_propagator_splitted 1
#define _default_source_splitted 1
Expand Down Expand Up @@ -199,6 +203,8 @@

#define _default_external_inverter 0

#define _default_external_eigsolver 0

#define _default_external_library 0

#define _default_subprocess_flag 0
Expand Down
15 changes: 15 additions & 0 deletions doc/quda.tex
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,18 @@ \subsubsection{QUDA-MG interface}
In other words, if the largest of these smallest eigenvalues is $4\cdot10^{-3}$, for example, then \texttt{MGEigSolverPolyMin} can be set to 0.01.
This ensures that the desired (smallest) part of the spectrum is smaller than \texttt{MGEigSolverPolyMin} and that the entire spectrum is contained in the range up to \texttt{MGEigSolverPolyMax}.
After this, polynomial acceleration can be enabled, which should reduce setup time significantly.

\subsubsection{Using the QUDA eigensolver in the HMC}

When employing the rational approximation, in order to make sure that the eigenvalue bounds are chosen appropriately, it is necessary to measure the maximal and minimal eigenvalues of the operator involved in the given monomial.
For the monomials \texttt{NDRAT, NDRATCOR, NDCLOVERRAT} and \texttt{NDCLOVERRATCOR}, this can be done using QUDA's eigensolver when, in addition to a non-zero setting for \texttt{ComputeEVFreq}, \texttt{UseExternalEigSolver = quda} is set.

The eigensolver further offers the following parameters:
\begin{itemize}
\item{ \texttt{EigSolverPolynomialDegree}: Once appropriate parameters for the polynomial filter have been determined (see \texttt{EigSolverPolyMin} and \texttt{EigSolverPolyMax} below), when \texttt{EigSolverPolynomialDegree} is set to a non-zero value, polynomial acceleration will be used in the measurent of the smallest eigenvalue. (integer, default: \texttt{128}) }
\item{ \texttt{EigSolverPolyMin}: Smallest eigenvalue to be excluded by the polynomial filter when polynomial acceleration is used. A good value for this should be determined by first running the eigensolver without acceleration (\texttt{EigSolverPolynomialDegree = 0}). \texttt{EigSolverPolyMin} should then be set to about $3\lambda_\mathrm{min}$. Note that this is specified in the operator normalisation, such that $\lambda_\mathrm{min}$ obtained from the measurement should be multiplied by \texttt{StildeMax} to get an appropriate value for \texttt{EigSolverPolyMin}. (positive real number, default: \texttt{0.001})}
\item{ \texttt{EigSolverPolyMax}: Largest eigenvalue to be excluded by the polynomial filter when polynomial acceleration is used. This should be set to a value in excess of the measured largest eigenvalue, $1.5\lambda_\mathrm{max}$, say. Note that this is specified in the operator normalisation such that the measured $\lambda_\mathrm{max}$ should be multiplied by \texttt{StildeMax} to obtain an appropriate value for \texttt{EigSolverPolyMax}. (positive real number, defaullt: \texttt{4.0})}
\item{ \texttt{EigSolverKrylovSubspaceSize}: Size of the Krylov space used for the determination of the smallest and largest eigenvalues. The default seems to work well even for large lattices. (integer, default: \texttt{96})}
\end{itemize}


6 changes: 6 additions & 0 deletions misc_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ typedef enum ExternalInverter_s {
QPHIX_INVERTER
} ExternalInverter;

/* enumeration type for the external eigensolver */
typedef enum ExternalEigSolver_s {
NO_EXT_EIGSOLVER = 0,
QUDA_EIGSOLVER
} ExternalEigSolver;

/* enumeration type for the external inverter */
typedef enum ExternalLibrary_s {
NO_EXT_LIB = 0,
Expand Down
6 changes: 6 additions & 0 deletions monomial/monomial.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ int add_monomial(const int type) {
monomial_list[no_monomials].solver_params.external_inverter = _default_external_inverter;
monomial_list[no_monomials].solver_params.sloppy_precision = _default_operator_sloppy_precision_flag;
monomial_list[no_monomials].external_library = _default_external_library;
monomial_list[no_monomials].external_eigsolver = _default_external_eigsolver;
monomial_list[no_monomials].solver_params.refinement_precision = _default_operator_sloppy_precision_flag;
monomial_list[no_monomials].HB_solver_params = monomial_list[no_monomials].solver_params;
monomial_list[no_monomials].even_odd_flag = _default_even_odd_flag;
Expand Down Expand Up @@ -143,6 +144,11 @@ int add_monomial(const int type) {
monomial_list[no_monomials].PrecisionHfinal = _default_g_acc_Hfin;
monomial_list[no_monomials].PrecisionPtilde = _default_g_acc_Ptilde;

monomial_list[no_monomials].eig_polydeg = _default_eig_polydeg;
monomial_list[no_monomials].eig_amin = _default_eig_amin;
monomial_list[no_monomials].eig_amax = _default_eig_amax;
monomial_list[no_monomials].eig_n_kr = _default_eig_n_kr;

monomial_list[no_monomials].rat.order = 12;
monomial_list[no_monomials].rat.range[0] = _default_stilde_min;
monomial_list[no_monomials].rat.range[1] = _default_stilde_max;
Expand Down
3 changes: 3 additions & 0 deletions monomial/monomial.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ typedef struct {
double PrecisionHfinal;
double StildeMin, StildeMax;
double EVMin, EVMax, EVMaxInv;
int eig_polydeg, eig_n_kr;
double eig_amin, eig_amax;
ExternalLibrary external_library;
ExternalEigSolver external_eigsolver;
double * MDPolyCoefs, * PtildeCoefs;
/* rational approximation */
rational_t rat;
Expand Down
76 changes: 65 additions & 11 deletions phmc.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <math.h>
#include <string.h>
#include <time.h>
#include <float.h>

#include "global.h"

Expand All @@ -40,6 +41,10 @@
#include "solver/matrix_mult_typedef_bi.h"
#include "gettime.h"

#ifdef TM_USE_QUDA
# include "quda_interface.h"
#endif

// --> in monomial
double phmc_Cpol; // --> MDPolyLocNormConst
double phmc_cheb_evmin, phmc_cheb_evmax; // --> EVMin, EVMax
Expand Down Expand Up @@ -206,7 +211,9 @@ void init_phmc() {
void phmc_compute_ev(const int trajectory_counter,
const int id,
matrix_mult_bi Qsq) {
double atime, etime, temp=0., temp2=0.;
double atime, etime;
_Complex double eval_min = 0.0;
_Complex double eval_max = 0.0;
int max_iter_ev, no_eigenvalues;
char buf[100];
char * phmcfilename = buf;
Expand All @@ -223,28 +230,75 @@ void phmc_compute_ev(const int trajectory_counter,
}

no_eigenvalues = 1;

temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq);
if(mnl->external_eigsolver == QUDA_EIGSOLVER) {
#ifdef TM_USE_QUDA
eigsolveQuda(&eval_min, no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0,
mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin,
mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag,
1, // we only support even-odd here
mnl->solver_params.refinement_precision,
mnl->solver_params.sloppy_precision,
mnl->solver_params.compression_type, 0);
if( fabs(mnl->EVMax - 1) < 2*DBL_EPSILON ) {
eval_min /= mnl->StildeMax;
}
#else
if(g_proc_id == 0) {
fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n");
#ifdef TM_USE_MPI
MPI_Finalize();
#endif
exit(-2);
}
#endif
}else {
eval_min = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq);
}


no_eigenvalues = 1;
temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq);
if(mnl->external_eigsolver == QUDA_EIGSOLVER) {
#ifdef TM_USE_QUDA
eigsolveQuda(&eval_max, no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1,
mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin,
mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag,
1, // we only support even-odd here
mnl->solver_params.refinement_precision,
mnl->solver_params.sloppy_precision,
mnl->solver_params.compression_type, 0);
if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) {
eval_max /= mnl->StildeMax;
}
#else
if(g_proc_id == 0) {
fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n");
#ifdef TM_USE_MPI
MPI_Finalize();
#endif
exit(-2);
}
#endif
}else {
eval_max = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq);
}


if((g_proc_id == 0) && (g_debug_level > 1)) {
printf("# %s: lowest eigenvalue end of trajectory %d = %e\n",
mnl->name, trajectory_counter, temp);
mnl->name, trajectory_counter, creal(eval_min));
printf("# %s: maximal eigenvalue end of trajectory %d = %e\n",
mnl->name, trajectory_counter, temp2);
mnl->name, trajectory_counter, creal(eval_max));
}
if(g_proc_id == 0) {
if(temp2 > mnl->EVMax) {
fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s larger than upper bound!\n\n", mnl->name);
if(creal(eval_max) > mnl->EVMax) {
fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s: %.6f is larger than upper bound: %.6f\n\n", mnl->name, creal(eval_max), mnl->EVMax);
}
if(temp < mnl->EVMin) {
fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s smaller than lower bound!\n\n", mnl->name);
if(creal(eval_min) < mnl->EVMin) {
fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s: %.6f is smaller than lower bound: %.6f\n\n", mnl->name, creal(eval_min), mnl->EVMin);
}
countfile = fopen(phmcfilename, "a");
fprintf(countfile, "%.8d %1.5e %1.5e %1.5e %1.5e\n",
trajectory_counter, temp, temp2, mnl->EVMin, mnl->EVMax);
trajectory_counter, creal(eval_min), creal(eval_max), mnl->EVMin, mnl->EVMax);
fclose(countfile);
}
etime = gettime();
Expand Down
151 changes: 151 additions & 0 deletions quda_interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* 2018 Bartosz Kostrzewa, Ferenc Pittler
* 2019, 2020 Bartosz Kostrzewa
* 2021 Bartosz Kostrzewa, Marco Garofalo, Ferenc Pittler, Simone Bacchio
* 2022 Simone Romiti, Bartosz Kostrzewa
* 2023 Aniket Sen, Bartosz Kostrzewa
*
* This file is part of tmLQCD.
*
Expand Down Expand Up @@ -150,6 +152,10 @@ QudaEigParam mg_eig_param[QUDA_MAX_MG_LEVEL];
// input params specific to tmLQCD QUDA interface
tm_QudaParams_t quda_input;


// parameters for the eigensolver
QudaEigParam eig_param;

// pointer to the QUDA gaugefield
double *gauge_quda[4];

Expand Down Expand Up @@ -2594,3 +2600,148 @@ void compute_WFlow_quda(const double eps, const double tmax, const int traj, FI
free(obs_param);
tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA");
}




/********************************************************
Interface function for Eigensolver on Quda
*********************************************************/


void eigsolveQuda(_Complex double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin,
const double precision, const int max_iter, const int polydeg, const double amin,
const double amax, const int n_kr, const int solver_flag, const int rel_prec,
const int even_odd_flag, const SloppyPrecision refinement_precision,
SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag) {

tm_stopwatch_push(&g_timers, __func__, "");


// it returns if quda is already init
_initQuda();

if ( rel_prec )
inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL;
else
inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL;

inv_param.kappa = g_kappa;

// figure out which BC tu use (theta, trivial...)
set_boundary_conditions(&compression, &gauge_param);

set_sloppy_prec(sloppy_precision, refinement_precision, &gauge_param, &inv_param);

// load gauge after setting precision
_loadGaugeQuda(compression);

if ( oneFlavourFlag ) {
_setOneFlavourSolverParam(g_kappa, g_c_sw, g_mu, solver_flag, even_odd_flag, precision, max_iter,
1 /*single_parity_solve */,
1 /*always QpQm*/);
}else {
_setTwoFlavourSolverParam(g_kappa, g_c_sw, g_mubar, g_epsbar, solver_flag, even_odd_flag, precision, max_iter,
1 /*single_parity_solve */,
1 /*always QpQm*/);
}

// create new eig_param
eig_param = newQudaEigParam();

// need our own QudaInvertParam for passing the operator properties
// as we modify the precision below
QudaInvertParam eig_invert_param = newQudaInvertParam();
eig_invert_param = inv_param;
eig_param.invert_param = &eig_invert_param;
eig_param.invert_param->verbosity = QUDA_VERBOSE;
/* AS The following two are set to cuda_prec, otherwise
* it gives an error. Such high precision might not be
* necessary. But have not found a way to consistently set
* the different precisions. */
eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec;
eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.clover_cuda_prec;

// for consistency with tmLQCD's own eigensolver we require a precision of at least
// 1e-14
if(tol < 1.e-14) {
eig_param.tol = 1.e-14;
eig_param.qr_tol = 1.e-14;
}else {
eig_param.tol = tol;
eig_param.qr_tol = tol;
}

if(blkwise == 1) {
eig_param.eig_type = QUDA_EIG_BLK_TR_LANCZOS;
eig_param.block_size = blksize;
}else {
eig_param.eig_type = QUDA_EIG_TR_LANCZOS;
eig_param.block_size = 1;
}

if(eig_param.invert_param->solve_type == QUDA_NORMOP_PC_SOLVE) {
eig_param.use_pc = QUDA_BOOLEAN_TRUE;
eig_param.use_norm_op = QUDA_BOOLEAN_TRUE;
}else if(eig_param.invert_param->solve_type == QUDA_DIRECT_PC_SOLVE) {
eig_param.use_pc = QUDA_BOOLEAN_TRUE;
eig_param.use_norm_op = QUDA_BOOLEAN_FALSE;
}else if(eig_param.invert_param->solve_type == QUDA_NORMOP_SOLVE) {
eig_param.use_pc = QUDA_BOOLEAN_FALSE;
eig_param.use_norm_op = QUDA_BOOLEAN_TRUE;
}else {
eig_param.use_pc = QUDA_BOOLEAN_FALSE;
eig_param.use_norm_op = QUDA_BOOLEAN_FALSE;
}

eig_param.use_poly_acc = (maxmin == 1) || (polydeg == 0) ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
eig_param.poly_deg = polydeg;
eig_param.a_min = amin;
eig_param.a_max = amax;

/* Daggers the operator. Not necessary for
* most cases. */
eig_param.use_dagger = QUDA_BOOLEAN_FALSE;

/* Most likely not necessary. Set TRUE to use
* Eigen routines to eigensolve the upper Hessenberg via QR */
eig_param.use_eigen_qr = QUDA_BOOLEAN_FALSE;

eig_param.compute_svd = QUDA_BOOLEAN_FALSE;

/* Set TRUE to performs the \gamma_5 OP solve by
* post multipling the eignvectors with \gamma_5
* before computing the eigenvalues */
eig_param.compute_gamma5 = QUDA_BOOLEAN_FALSE;


if(maxmin == 1) eig_param.spectrum = QUDA_SPECTRUM_LR_EIG;
else eig_param.spectrum = QUDA_SPECTRUM_SR_EIG;


/* At the moment, the eigenvalues and eigenvectors are neither
* written to or read from disk, but if necessary, can be added
* as a feature in future, by setting the following filenames */
strncpy(eig_param.vec_outfile,"",256);
strncpy(eig_param.vec_infile,"",256);


/* The size of eigenvector search space and
* the number of required converged eigenvectors
* is both set to n_evals */
eig_param.n_conv = n_evals;
eig_param.n_ev = n_evals;
/* The size of the Krylov space is set to 96.
* From my understanding, QUDA automatically scales
* this search space, however more testing on this
* might be necessary */
eig_param.n_kr = n_kr;

eig_param.max_restarts = max_iterations;

eigensolveQuda(NULL, evals, &eig_param);

tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA");
}
7 changes: 7 additions & 0 deletions quda_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,11 @@ int invert_eo_quda_twoflavour_mshift(spinor ** const out_up, spinor ** const out
void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf);
void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile);


void eigsolveQuda(_Complex double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin,
const double precision, const int max_iter, const int polydeg, const double amin,
const double amax, const int n_kr, const int solver_flag, const int rel_prec,
const int even_odd_flag, const SloppyPrecision refinement_precision,
SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag);

#endif /* QUDA_INTERFACE_H_ */
Loading

0 comments on commit 49b0dea

Please sign in to comment.