diff --git a/tests/contract_ft_test.cpp b/tests/contract_ft_test.cpp index 64ddcdfa04..740adf6cc9 100644 --- a/tests/contract_ft_test.cpp +++ b/tests/contract_ft_test.cpp @@ -181,9 +181,8 @@ inline int launch_contract_test(const QudaContractType cType, const std::array(spinorX.data(), spinorY.data(), d_result.data(), cType, - src_colors, X.data(), source_position.data(), n_mom, mom.data(), fft_type.data()); + int faults = contractionFT_reference(spinorX.data(), spinorY.data(), d_result.data(), cType, src_colors, + X.data(), source_position.data(), n_mom, mom.data(), fft_type.data()); return faults; } diff --git a/tests/host_reference/contract_ft_reference.h b/tests/host_reference/contract_ft_reference.h index d1927a8255..e811d4ec99 100644 --- a/tests/host_reference/contract_ft_reference.h +++ b/tests/host_reference/contract_ft_reference.h @@ -126,8 +126,8 @@ void contractFTHost(void **h_prop_array_flavor_1, void **h_prop_array_flavor_2, for (int c1 = 0; c1 < src_colors; c1++) { // color contraction size_t off = nSpin * 3 * 2 * (Vh * parity + cb_idx); - contractColors(static_cast(h_prop_array_flavor_1[s1 * src_colors + c1]) + off, - static_cast(h_prop_array_flavor_2[s2 * src_colors + c1]) + off, nSpin, M.data()); + contractColors(static_cast(h_prop_array_flavor_1[s1 * src_colors + c1]) + off, + static_cast(h_prop_array_flavor_2[s2 * src_colors + c1]) + off, nSpin, M.data()); // apply gamma matrices here diff --git a/tests/host_reference/gauge_force_reference.cpp b/tests/host_reference/gauge_force_reference.cpp index 06efe3633b..741efdf0b7 100644 --- a/tests/host_reference/gauge_force_reference.cpp +++ b/tests/host_reference/gauge_force_reference.cpp @@ -437,26 +437,30 @@ void gauge_force_reference_dir(void *refMom, int dir, double eb3, quda::GaugeFie for (int i = 0; i < num_paths; i++) { if (prec == QUDA_DOUBLE_PRECISION) { double *my_loop_coeff = (double *)loop_coeff; - compute_path_product((dsu3_matrix *)staple, u_ex.data_array().data, path_dir[i], length[i], my_loop_coeff[i], - dir, lat); + compute_path_product((dsu3_matrix *)staple, u_ex.data_array().data, path_dir[i], length[i], + my_loop_coeff[i], dir, lat); } else { float *my_loop_coeff = (float *)loop_coeff; - compute_path_product((fsu3_matrix *)staple, u_ex.data_array().data, path_dir[i], length[i], my_loop_coeff[i], - dir, lat); + compute_path_product((fsu3_matrix *)staple, u_ex.data_array().data, path_dir[i], length[i], + my_loop_coeff[i], dir, lat); } } if (compute_force) { if (prec == QUDA_DOUBLE_PRECISION) { - update_mom((danti_hermitmat *)refMom, dir, u.data_array().data, (dsu3_matrix *)staple, (double)eb3, lat); + update_mom((danti_hermitmat *)refMom, dir, u.data_array().data, (dsu3_matrix *)staple, (double)eb3, + lat); } else { - update_mom((fanti_hermitmat *)refMom, dir, u.data_array().data, (fsu3_matrix *)staple, (float)eb3, lat); + update_mom((fanti_hermitmat *)refMom, dir, u.data_array().data, (fsu3_matrix *)staple, (float)eb3, + lat); } } else { if (prec == QUDA_DOUBLE_PRECISION) { - update_gauge((dsu3_matrix *)refMom, dir, u.data_array().data, (dsu3_matrix *)staple, (double)eb3, lat); + update_gauge((dsu3_matrix *)refMom, dir, u.data_array().data, (dsu3_matrix *)staple, (double)eb3, + lat); } else { - update_gauge((fsu3_matrix *)refMom, dir, u.data_array().data, (fsu3_matrix *)staple, (float)eb3, lat); + update_gauge((fsu3_matrix *)refMom, dir, u.data_array().data, (fsu3_matrix *)staple, (float)eb3, + lat); } } host_free(staple); @@ -472,12 +476,13 @@ void gauge_force_reference(void *refMom, double eb3, quda::GaugeField &u, int ** setGaugeParam(param); param.gauge_order = QUDA_QDP_GAUGE_ORDER; param.t_boundary = QUDA_PERIODIC_T; + auto qdp_ex = quda::createExtendedGauge(u.data_array().data, param, R); lattice_t lat(*qdp_ex); for (int dir = 0; dir < 4; dir++) { - gauge_force_reference_dir(refMom, dir, eb3, u, *qdp_ex, u.Precision(), path_dir[dir], length, loop_coeff, - num_paths, lat, compute_force); + gauge_force_reference_dir(refMom, dir, eb3, u, *qdp_ex, u.Precision(), path_dir[dir], length, loop_coeff, num_paths, + lat, compute_force); } delete qdp_ex; @@ -500,11 +505,13 @@ void gauge_loop_trace_reference(quda::GaugeField &u, std::vector for (int i = 0; i < num_paths; i++) { if (u.Precision() == QUDA_DOUBLE_PRECISION) { - dcomplex tr = compute_loop_trace(qdp_ex->data_array().data, input_path[i], length[i], path_coeff[i], lat); + dcomplex tr + = compute_loop_trace(qdp_ex->data_array().data, input_path[i], length[i], path_coeff[i], lat); loop_tr_dbl[2 * i] = factor * tr.real; loop_tr_dbl[2 * i + 1] = factor * tr.imag; } else { - dcomplex tr = compute_loop_trace(qdp_ex->data_array().data, input_path[i], length[i], path_coeff[i], lat); + dcomplex tr + = compute_loop_trace(qdp_ex->data_array().data, input_path[i], length[i], path_coeff[i], lat); loop_tr_dbl[2 * i] = factor * tr.real; loop_tr_dbl[2 * i + 1] = factor * tr.imag; } diff --git a/tests/host_reference/hisq_force_reference.cpp b/tests/host_reference/hisq_force_reference.cpp index 1d23485051..33d773ed95 100644 --- a/tests/host_reference/hisq_force_reference.cpp +++ b/tests/host_reference/hisq_force_reference.cpp @@ -756,7 +756,8 @@ void computeOneLinkSite( #else const int[], #endif - int half_lattice_index, const Real *const *const oprod, int sig, Real coeff, const LoadStore &ls, Real *const *const output) + int half_lattice_index, const Real *const *const oprod, int sig, Real coeff, const LoadStore &ls, + Real *const *const output) { if (GOES_FORWARDS(sig)) { typename ColorMatrix::Type colorMatW; @@ -836,13 +837,13 @@ void computeMiddleLinkSite(int half_lattice_index, // half_lattice_index to bett if (Qprev == NULL) { if (sig_positive) { - ls.loadMatrixFromField(static_cast(oprod), 1 - oddBit, sig, point_d, &colorMatY); + ls.loadMatrixFromField(static_cast(oprod), 1 - oddBit, sig, point_d, &colorMatY); } else { - ls.loadMatrixFromField(static_cast(oprod), oddBit, OPP_DIR(sig), point_c, &colorMatY); + ls.loadMatrixFromField(static_cast(oprod), oddBit, OPP_DIR(sig), point_c, &colorMatY); colorMatY = conj(colorMatY); } } else { // Qprev != NULL - ls.loadMatrixFromField(static_cast(oprod), oddBit, point_c, &colorMatY); + ls.loadMatrixFromField(static_cast(oprod), oddBit, point_c, &colorMatY); } colorMatW = (!mu_positive) ? bc_link * colorMatY : conj(bc_link) * colorMatY; @@ -973,8 +974,8 @@ void computeSideLinkSite(int half_lattice_index, // half_lattice_index to better template void computeSideLinkField(const int dim[4], const Real *const P3, const Real *const Qprod, // why? - const Real *const *const link, int sig, int mu, Real coeff, Real accumu_coeff, Real *const shortP, - Real *const *const newOprod) + const Real *const *const link, int sig, int mu, Real coeff, Real accumu_coeff, + Real *const shortP, Real *const *const newOprod) { // Need some way of setting half_volume int volume = 1; @@ -999,8 +1000,8 @@ void computeSideLinkField(const int dim[4], const Real *const P3, template void computeAllLinkSite(int half_lattice_index, // half_lattice_index to better match the GPU code. - const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const *const link, - int sig, int mu, Real coeff, Real accumu_coeff, + const int dim[4], const Real *const oprod, const Real *const Qprev, + const Real *const *const link, int sig, int mu, Real coeff, Real accumu_coeff, const LoadStore &ls, // pass a function object to read from and write to matrix fields Real *const shortP, Real *const *const newOprod) { @@ -1086,8 +1087,9 @@ void computeAllLinkSite(int half_lattice_index, // half_lattice_index to better } // allLinkKernel template -void computeAllLinkField(const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const *const link, - int sig, int mu, Real coeff, Real accumu_coeff, Real *const shortP, Real *const *const newOprod) +void computeAllLinkField(const int dim[4], const Real *const oprod, const Real *const Qprev, + const Real *const *const link, int sig, int mu, Real coeff, Real accumu_coeff, + Real *const shortP, Real *const *const newOprod) { int volume = 1; for (int dir = 0; dir < 4; ++dir) volume *= dim[dir]; @@ -1223,14 +1225,16 @@ void hisqStaplesForceCPU(const double *path_coeff, quda::GaugeField &oprod, quda if (precision == QUDA_SINGLE_PRECISION) { // allocate memory for temporary fields float *tempmat[6]; - for (int i = 0; i < 6; i++) { tempmat[i] = static_cast(safe_malloc(len * 18 * precision)); } - doHisqStaplesForceCPU(X_, act_path_coeff, oprod.data_array().data, link.data_array().data, tempmat, newOprod->data_array().data); + for (int i = 0; i < 6; i++) { tempmat[i] = static_cast(safe_malloc(len * 18 * precision)); } + doHisqStaplesForceCPU(X_, act_path_coeff, oprod.data_array().data, link.data_array().data, + tempmat, newOprod->data_array().data); for (int i = 0; i < 6; ++i) { host_free(tempmat[i]); } } else if (precision == QUDA_DOUBLE_PRECISION) { // allocate memory for temporary fields double *tempmat[6]; - for (int i = 0; i < 6; i++) { tempmat[i] = static_cast(safe_malloc(len * 18 * precision)); } - doHisqStaplesForceCPU(X_, act_path_coeff, oprod.data_array().data, link.data_array().data, tempmat, newOprod->data_array().data); + for (int i = 0; i < 6; i++) { tempmat[i] = static_cast(safe_malloc(len * 18 * precision)); } + doHisqStaplesForceCPU(X_, act_path_coeff, oprod.data_array().data, + link.data_array().data, tempmat, newOprod->data_array().data); for (int i = 0; i < 6; ++i) { host_free(tempmat[i]); } } else { errorQuda("Unsupported precision"); @@ -1238,8 +1242,9 @@ void hisqStaplesForceCPU(const double *path_coeff, quda::GaugeField &oprod, quda } template -void computeLongLinkSite(int half_lattice_index, const int dim[4], const Real *const *const oprod, const Real *const *const link, - int sig, Real coeff, const LoadStore &ls, Real *const *const output) +void computeLongLinkSite(int half_lattice_index, const int dim[4], const Real *const *const oprod, + const Real *const *const link, int sig, Real coeff, const LoadStore &ls, + Real *const *const output) { if (GOES_FORWARDS(sig)) { @@ -1286,8 +1291,8 @@ void computeLongLinkSite(int half_lattice_index, const int dim[4], const Real *c } template -void computeLongLinkField(const int dim[4], const Real *const *const oprod, const Real *const *const link, int sig, Real coeff, - Real *const *const output) +void computeLongLinkField(const int dim[4], const Real *const *const oprod, const Real *const *const link, int sig, + Real coeff, Real *const *const output) { int volume = 1; for (int dir = 0; dir < 4; ++dir) volume *= dim[dir]; @@ -1313,9 +1318,11 @@ void hisqLongLinkForceCPU(double coeff, quda::GaugeField &oprod, quda::GaugeFiel for (int sig = 0; sig < 4; ++sig) { if (precision == QUDA_SINGLE_PRECISION) { - computeLongLinkField(X_, oprod.data_array().data, link.data_array().data, sig, coeff, newOprod->data_array().data); + computeLongLinkField(X_, oprod.data_array().data, link.data_array().data, sig, coeff, + newOprod->data_array().data); } else if (precision == QUDA_DOUBLE_PRECISION) { - computeLongLinkField(X_, oprod.data_array().data, link.data_array().data, sig, coeff, newOprod->data_array().data); + computeLongLinkField(X_, oprod.data_array().data, link.data_array().data, sig, coeff, + newOprod->data_array().data); } else { errorQuda("Unrecognised precision"); } @@ -1329,8 +1336,8 @@ void completeForceSite(int half_lattice_index, #else const int[], #endif - const Real *const *const oprod, const Real *const *const link, int sig, const LoadStore &ls, - Real *const mom) + const Real *const *const oprod, const Real *const *const link, int sig, + const LoadStore &ls, Real *const mom) { typename ColorMatrix::Type colorMatX, colorMatY, linkW; @@ -1351,7 +1358,8 @@ void completeForceSite(int half_lattice_index, } template -void completeForceField(const int dim[4], const Real *const *const oprod, const Real *const *const link, int sig, Real *const mom) +void completeForceField(const int dim[4], const Real *const *const oprod, const Real *const *const link, int sig, + Real *const mom) { int volume = dim[0] * dim[1] * dim[2] * dim[3]; const int half_volume = volume / 2; @@ -1371,9 +1379,11 @@ void hisqCompleteForceCPU(quda::GaugeField &oprod, quda::GaugeField &link, quda: for (int sig = 0; sig < 4; ++sig) { if (precision == QUDA_SINGLE_PRECISION) { - completeForceField(X_, oprod.data_array().data, link.data_array().data, sig, mom->data()); + completeForceField(X_, oprod.data_array().data, link.data_array().data, sig, + mom->data()); } else if (precision == QUDA_DOUBLE_PRECISION) { - completeForceField(X_, oprod.data_array().data, link.data_array().data, sig, mom->data()); + completeForceField(X_, oprod.data_array().data, link.data_array().data, sig, + mom->data()); } else { errorQuda("Unrecognised precision"); } diff --git a/tests/host_reference/staggered_dslash_reference.cpp b/tests/host_reference/staggered_dslash_reference.cpp index 2e7e4e0f8a..eb7eb3d604 100644 --- a/tests/host_reference/staggered_dslash_reference.cpp +++ b/tests/host_reference/staggered_dslash_reference.cpp @@ -146,14 +146,14 @@ void stag_dslash(ColorSpinorField &out, const GaugeField &fat_link, const GaugeF long_link.Ghost()[3].data()}; if (in.Precision() == QUDA_DOUBLE_PRECISION) { - staggeredDslashReference(out.data(), reinterpret_cast(qdp_fatlink), + staggeredDslashReference(out.data(), reinterpret_cast(qdp_fatlink), reinterpret_cast(qdp_longlink), reinterpret_cast(ghost_fatlink), reinterpret_cast(ghost_longlink), in.data(), reinterpret_cast(in.fwdGhostFaceBuffer), reinterpret_cast(in.backGhostFaceBuffer), oddBit, daggerBit, dslash_type, laplace3D); } else if (in.Precision() == QUDA_SINGLE_PRECISION) { - staggeredDslashReference(out.data(), reinterpret_cast(qdp_fatlink), + staggeredDslashReference(out.data(), reinterpret_cast(qdp_fatlink), reinterpret_cast(qdp_longlink), reinterpret_cast(ghost_fatlink), reinterpret_cast(ghost_longlink), in.data(), reinterpret_cast(in.fwdGhostFaceBuffer), diff --git a/tests/laph_test.cpp b/tests/laph_test.cpp index 647daa3c1a..75f244d406 100644 --- a/tests/laph_test.cpp +++ b/tests/laph_test.cpp @@ -114,8 +114,8 @@ auto laph_test(test_t param) std::vector qudaRes(nSink * nEv * Lt * nSpin, 0.); int X[4] = {xdim, ydim, zdim, tdim}; - laphSinkProject((__complex__ double *)qudaRes.data(), snkPtr.data(), nSink, tileSink, - evPtr.data(), nEv, tileEv, &invParam, X); + laphSinkProject((__complex__ double *)qudaRes.data(), snkPtr.data(), nSink, tileSink, evPtr.data(), nEv, tileEv, + &invParam, X); printfQuda("laphSinkProject Done: %g secs, %g Gflops\n", invParam.secs, invParam.gflops / invParam.secs); auto tol = getTolerance(cuda_prec);