Skip to content

Commit

Permalink
Merge pull request #549 from etmc/quda_work_clover_force
Browse files Browse the repository at this point in the history
WIP Quda work clover force
  • Loading branch information
kostrzewa authored Jan 27, 2024
2 parents 863ed0f + 6d2f3fe commit 28295ac
Show file tree
Hide file tree
Showing 16 changed files with 439 additions and 127 deletions.
2 changes: 1 addition & 1 deletion Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ MODULES = read_input gamma measure_gauge_action start \
little_D block operator \
spinor_fft X_psi P_M_eta \
jacobi fatal_error invert_clover_eo gettime \
tm_debug_printf \
tm_debug_printf compare_derivative \
@SPI_FILES@ @QUDA_INTERFACE@ @DDalphaAMG_INTERFACE@

CXXMODULES = @QPHIX_INTERFACE@
Expand Down
79 changes: 79 additions & 0 deletions compare_derivative.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/***********************************************************************
*
* Copyright (C) 2024 Bartosz Kostrzewa
*
* This file is part of tmLQCD.
*
* tmLQCD is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* tmLQCD is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with tmLQCD. If not, see <http://www.gnu.org/licenses/>.
***********************************************************************/

#ifdef HAVE_CONFIG_H
# include<tmlqcd_config.h>
#endif
#ifdef TM_USE_OMP
# include <omp.h>
#endif
#include <stdio.h>
#include "global.h"
#include "monomial/monomial.h"

/* this function compares two derivatives calculated by an external library and tmLQCD */
void compare_derivative(monomial *mnl, su3adj **ext_lib, su3adj **native,
const double threshold, const char * name){
int n_diff = 0;

for(int ix = 0; ix < VOLUME; ix++){
for(int mu=0; mu<4; mu++){
double *ext=&(ext_lib[ix][mu].d1);
double *nat=&(native[ix][mu].d1);
for(int j=0; j<8; ++j){
double diff=ext[j]-nat[j];
if (sqrt(diff*diff) > threshold || isnan( ext[j] ) || isinf(ext[j]) ){
n_diff++;
printf("derivative at (t,x,y,z,mu,j) %d,%d,%d,%d,%d,%d,"
" ext: %-14e, native: %-14e ratio: %-14g diff %-14g on proc_id %d\n",
g_coord[ix][0], g_coord[ix][1], g_coord[ix][2], g_coord[ix][3], mu, j,
ext[j], nat[j], ext[j]/nat[j], ext[j]-nat[j], g_proc_id);
}
}
}
}
if(n_diff > 0){
printf("%s: the deviation between tmLQCD and the external library "
"exceeds the threshold %.1e in %d case(s) for parameters: c0=%e c1=%e g_beta=%e on proc_id: %d\n",
name,
threshold,
n_diff,
mnl->c0,
mnl->c1,
mnl->beta,
g_proc_id);

if(g_strict_residual_check) fatal_error("Difference between external library and tmLQCD-native function!",
name);
}

int red_n_diff = 0;
#ifdef TM_USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
MPI_Reduce(&n_diff, &red_n_diff, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
#else
red_n_diff = n_diff;
#endif
if(g_proc_id == 0){
printf("The maximum number of deviations in %s exceeding the threshold %.1e was %d\n",
name, threshold, red_n_diff);
}
}

29 changes: 29 additions & 0 deletions compare_derivative.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/***********************************************************************
*
* Copyright (C) 2024 Bartosz Kostrzewa
*
* This file is part of tmLQCD.
*
* tmLQCD is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* tmLQCD is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with tmLQCD. If not, see <http://www.gnu.org/licenses/>.
***********************************************************************/

#ifndef COMPARE_DERIVATIVE_H
#define COMPARE_DERIVATIVE_H

#include "monomial/monomial.h"
#include "su3adj.h"

void compare_derivative(monomial *mnl, su3adj **ext_lib, su3adj **native, const double threshold, const char * name);

#endif
10 changes: 10 additions & 0 deletions configure.in
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,16 @@ if test $enable_quda_experimental = yes; then
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether the QUDA force is enabled)
AC_ARG_ENABLE(quda_fermionic_forces,
AS_HELP_STRING([--enable-quda_fermionic_forces], [enable support for fermionic forces using QUDA [default=yes]]),
enable_quda_fermionic_forces=$enableval, enable_quda_fermionic_forces=yes)
if test $enable_quda_fermionic_forces = no; then
AC_MSG_RESULT(no)
else
AC_MSG_RESULT(yes)
AC_DEFINE(TM_QUDA_FERMIONIC_FORCES,1, fermionic forces with QUDA are enabled)
fi

# QPhiX library for Intel Xeon and Xeon Phis
AC_MSG_CHECKING(whether we want to use QPhiX)
Expand Down
23 changes: 15 additions & 8 deletions doc/input.tex
Original file line number Diff line number Diff line change
Expand Up @@ -425,17 +425,10 @@ \subsection{Input parameter for main program}
Each of them has different options
:
\begin{itemize}
\item {\ttfamily DET, CLOVERDET}:
\begin{itemize}
\item {\ttfamily 2KappaMu}
\end{itemize}
\item {\ttfamily CLOVERDET}:
\begin{itemize}
\item {\ttfamily csw}
\end{itemize}
\item {\ttfamily DET, CLOVERDET}:
\begin{itemize}
\item {\ttfamily Kappa}
\item {\ttfamily 2KappaMu}
\item {\ttfamily Timescale}: the timescale on which to integrate
this monomial. Counting starts from zero up to the total number of
timescales minus 1.
Expand All @@ -454,8 +447,17 @@ \subsection{Input parameter for main program}
\item {\ttfamily HB\_Solver}: the solver to be used in the heatbath step, see section \ref{sec:hb.solver} for details.
\item {\ttfamily Name}: a name to be assigned to the monomial. The
default is {\ttfamily DET}
\item {\ttfamily UseExternalInverter}
Equal to either {\ttfamily no} (default value) or {\ttfamily quda}.
\end{itemize}
%
\item {\ttfamily CLOVERDET}:
\begin{itemize}
\item {\ttfamily csw}
\item {\ttfamily UseExternalLibrary}
Equal to either {\ttfamily no} (default value) or {\ttfamily quda}.
\end{itemize}
%
\item {\ttfamily DETRATIO}: the same as for {\ttfamily DET}, but in
addition:
\begin{itemize}
Expand All @@ -464,10 +466,13 @@ \subsection{Input parameter for main program}
\item {\ttfamily Name}:
a name to be assigned to the monomial.
The default is {\ttfamily DETRATIO}
\item {\ttfamily UseExternalInverter}
Equal to either {\ttfamily no} (default value) or {\ttfamily quda}.
\end{itemize}
%
\item {\ttfamily CLOVERDETRATIO}:
see {\ttfamily CLOVERDET} and {\ttfamily DETRATIO}.

%
\item {\ttfamily GAUGE}:
\begin{itemize}
Expand All @@ -490,6 +495,8 @@ \subsection{Input parameter for main program}
\item {\ttfamily RectangleCoefficient}: the value of the parameter
$c_1$. The coefficient $c_0$ is computed from $c_0 = 1-8c_1$. Is
effective only for {\ttfamily type = user}.
\item {\ttfamily UseExternalLibrary}
Equal to either {\ttfamily no} (default value) or {\ttfamily quda}.
\end{itemize}
There is maximally one instance allowed of this type.

Expand Down
20 changes: 19 additions & 1 deletion doc/quda.tex
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ \subsubsection{Design goals of the interface}
\begin{enumerate}
\item \emph{Safety.} Naturally, highest priority is given to the correctness of the output of the interface.
This is trivially achieved by always checking the final residual on the CPU with the default tmLQCD routines.
\item \emph{Ease of use.} Within the operator declarations of the input file (between {\ttfamily BeginOperator} and {\ttfamily EndOperator}) a simple flag {\ttfamily UseExternalInverter} is introduced which, when set to {\ttfamily quda}, will let QUDA perform the inversion of that operator. The operators {\ttfamily TMWILSON, WILSON, DBTMWILSON} and {\ttfamily CLOVER, DBCLOVER} are supported. In the HMC, the same flag can be used to offload solves for the \texttt{DET, DETRATIO, CLOVERDET, CLOVERDETRATIO, RAT, RATCOR, NDRAT, NDRATCOR, NDCLOVERRAT} and \texttt{NDCLOVERRATCOR} monomials.
\item \emph{Ease of use.} Within the operator declarations of the input file (between {\ttfamily BeginOperator} and {\ttfamily EndOperator}) a simple flag {\ttfamily UseExternalInverter} is introduced which, when set to {\ttfamily quda}, will let QUDA perform the inversion of that operator. The operators {\ttfamily TMWILSON, WILSON, DBTMWILSON} and {\ttfamily CLOVER, DBCLOVER} are supported.
Within the monomial declarations of the input file (between {\ttfamily BeginMonomial} and {\ttfamily EndMonomial}) the same flag can be used to offload solves for the \texttt{DET, DETRATIO, CLOVERDET, CLOVERDETRATIO, RAT, RATCOR, NDRAT, NDRATCOR, NDCLOVERRAT} and \texttt{NDCLOVERRATCOR} monomials in the HMC.
Further, the flag {\ttfamily UseExternalLibrary} is introduced which, when set to {\ttfamily quda}, will let QUDA perform the force calculation for the given monomial with support currently limited to {\ttfamily GAUGE, CLOVERDET, CLOVERDETRATIO}.
\item \emph{Minimality.} Minimal changes in the form of {\ttfamily \#ifdef QUDA} precompiler directives to the tmLQCD code base. The main bulk of the interface lies in a single separate file {\ttfamily quda\_interface.c} (with corresponding header file). The QUDA interface is entered .
\item \emph{Performance.} The higher priority of the previous items results in small performance detriments. In particular:
\begin{itemize}
Expand Down Expand Up @@ -68,6 +70,22 @@ \subsubsection{Installation}
\end{verbatim}
Note that a {\ttfamily C++} compiler is required for linking against the QUDA library, therefore set {\ttfamily CXX} appropriately. {\ttfamily \${QUDADIR}} is where you installed QUDA in the previous step and {\ttfamily \${CUDADIR}} is required again for linking.

\subsubsection{QUDA versions}

If you need a version of QUDA after https://github.com/lattice/quda/commit/50864ffde1bd8f46fd4a2a2b2e6d44a5a588e2c2 you nee to configure with
\begin{verbatim}
--enable-quda_experimental=yes
\end{verbatim}

If you need a version of QUDA before \url{https://github.com/lattice/quda/commit/fd50676db06fc36efb3a791a3059c57cca70bb55} you need to add in the configuration script the option
\begin{verbatim}
--enable-quda_fermionic_forces=no
\end{verbatim}
so that the wrapper to the QUDA fermionic forces is not compiled,
thus if \texttt{--enable-quda_fermionic_forces=no} setting {\ttfamily UseExternalLibrary=yes} in the inputfile for the {\ttfamily CLOVERDET, CLOVERDETRATIO} monomials
is not supported and tmLQCD will stop with an error.


\subsubsection{Usage}
Any main program that reads and handles the operator declaration from an input file can easily be set up to use the QUDA inverter by setting the {\ttfamily UseExternalInverter} flag to {\ttfamily quda}. For example, in the input file for the {\ttfamily invert} executable, add the flag to the operator declaration as
\begin{verbatim}
Expand Down
2 changes: 1 addition & 1 deletion global.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ EXTERN su3_32 ** g_gauge_field_copy_32;

EXTERN su3adj ** moment;
EXTERN su3adj ** df0;
EXTERN su3adj ** ddummy;
EXTERN su3adj ** ddummy, ** debug_derivative;

EXTERN int count00,count01,count10,count11,count20,count21;
EXTERN double g_kappa, g_c_sw, g_beta;
Expand Down
3 changes: 3 additions & 0 deletions include/tmlqcd_config_internal.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@
/* Using experimental QUDA version */
#undef TM_QUDA_EXPERIMENTAL

/* Using QUDA fermionic forces */
#undef TM_QUDA_FERMIONIC_FORCES

/* Using DDalphaAMG */
#undef DDalphaAMG

Expand Down
23 changes: 22 additions & 1 deletion init/init_moment_field.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include "su3adj.h"
#include "sse.h"

su3adj * mo=NULL, *df=NULL, *du=NULL;
su3adj * mo=NULL, *df=NULL, *du=NULL, *du_internal=NULL;

int init_moment_field(const int V, const int VR) {
int i = 0;
Expand Down Expand Up @@ -94,6 +94,27 @@ int init_moment_field(const int V, const int VR) {
ddummy[i] = ddummy[i-1]+4;
}

if(g_debug_level>3){
if((void*)(du_internal = (su3adj*)calloc(4*VR+1, sizeof(su3adj))) == NULL) {
printf ("malloc errno : %d\n",errno);
errno = 0;
return(5);
}
if((void*)(debug_derivative = (su3adj**)calloc(VR,sizeof(su3adj*))) == NULL) {
printf ("malloc errno : %d\n",errno);
errno = 0;
return(6);
}
#if ( defined SSE || defined SSE2 || defined SSE3)
debug_derivative[0] = (su3adj*)(((unsigned long int)(du_internal)+ALIGN_BASE)&~ALIGN_BASE);
#else
debug_derivative[0] = du_internal;
#endif

for(i = 1; i < VR; i++){
debug_derivative[i] = debug_derivative[i-1]+4;
}
}
return(0);
}

Expand Down
Loading

0 comments on commit 28295ac

Please sign in to comment.