diff --git a/domain/include/cstone/domain/domain.hpp b/domain/include/cstone/domain/domain.hpp index 52c5e2618..fc6997f6d 100644 --- a/domain/include/cstone/domain/domain.hpp +++ b/domain/include/cstone/domain/domain.hpp @@ -264,7 +264,10 @@ class Domain gatherArrays(sorter.gatherFunc(), sorter.getMap() + global_.numSendDown(), global_.numAssigned(), exchangeStart, 0, std::tie(x, y, z, h, m), util::reverse(scratch)); - float invThetaEff = invThetaVecMac(theta_); + // invThetaEff needs to be more strict than the worst case vec mac, because if the tree is reused multiple + // times, an expansion center might have moved outside the cell, which can mark remote cells as halos on ranks + // that won't be found by findPeers + float invThetaEff = invThetaVecMac(theta_) * haloSearchExt_; std::vector peers = findPeersMac(myRank_, global_.assignment(), global_.octree(), box(), invThetaEff); if (firstCall_) diff --git a/main/src/init/evrard_init.hpp b/main/src/init/evrard_init.hpp index 52765d6ee..362596054 100644 --- a/main/src/init/evrard_init.hpp +++ b/main/src/init/evrard_init.hpp @@ -71,6 +71,8 @@ void initEvrardFields(Dataset& d, const std::map& constants std::fill(d.y_m1.begin(), d.y_m1.end(), 0.0); std::fill(d.z_m1.begin(), d.z_m1.end(), 0.0); + generateParticleIDs(d.id); + auto cv = sph::idealGasCv(d.muiConst, d.gamma); auto temp0 = constants.at("u0") / cv; std::fill(d.temp.begin(), d.temp.end(), temp0); diff --git a/main/src/init/gresho_chan.hpp b/main/src/init/gresho_chan.hpp index 466105700..48273dfc5 100644 --- a/main/src/init/gresho_chan.hpp +++ b/main/src/init/gresho_chan.hpp @@ -73,9 +73,10 @@ void initGreshoChanFields(Dataset& d, const std::map& setti std::fill(d.h.begin(), d.h.end(), hInit); std::fill(d.mui.begin(), d.mui.end(), d.muiConst); std::fill(d.alpha.begin(), d.alpha.end(), d.alphamin); - std::fill(d.z_m1.begin(), d.z_m1.end(), 0.0); + generateParticleIDs(d.id); + #pragma omp parallel for schedule(static) for (size_t i = 0; i < d.x.size(); ++i) { diff --git a/main/src/init/isobaric_cube_init.hpp b/main/src/init/isobaric_cube_init.hpp index 6ce3e7b83..b7ad39a94 100644 --- a/main/src/init/isobaric_cube_init.hpp +++ b/main/src/init/isobaric_cube_init.hpp @@ -90,6 +90,8 @@ void initIsobaricCubeFields(Dataset& d, const std::map& con std::fill(d.vy.begin(), d.vy.end(), 0.0); std::fill(d.vz.begin(), d.vz.end(), 0.0); + generateParticleIDs(d.id); + #pragma omp parallel for schedule(static) for (size_t i = 0; i < d.x.size(); i++) { diff --git a/main/src/init/kelvin_helmholtz_init.hpp b/main/src/init/kelvin_helmholtz_init.hpp index d1842c971..f0baa12ce 100644 --- a/main/src/init/kelvin_helmholtz_init.hpp +++ b/main/src/init/kelvin_helmholtz_init.hpp @@ -77,6 +77,8 @@ void initKelvinHelmholtzFields(Dataset& d, const std::map& std::fill(d.alpha.begin(), d.alpha.end(), d.alphamax); std::fill(d.vz.begin(), d.vz.end(), 0.0); + generateParticleIDs(d.id); + auto cv = sph::idealGasCv(d.muiConst, gamma); #pragma omp parallel for schedule(static) diff --git a/main/src/init/noh_init.hpp b/main/src/init/noh_init.hpp index 090b5feb0..e07591205 100644 --- a/main/src/init/noh_init.hpp +++ b/main/src/init/noh_init.hpp @@ -83,6 +83,8 @@ void initNohFields(Dataset& d, const std::map& constants) std::fill(d.temp.begin(), d.temp.end(), temp0); std::fill(d.alpha.begin(), d.alpha.end(), d.alphamin); + generateParticleIDs(d.id); + #pragma omp parallel for schedule(static) for (size_t i = 0; i < d.x.size(); i++) { diff --git a/main/src/init/sedov_init.hpp b/main/src/init/sedov_init.hpp index 05a54d826..d5596c64a 100644 --- a/main/src/init/sedov_init.hpp +++ b/main/src/init/sedov_init.hpp @@ -73,6 +73,8 @@ void initSedovFields(Dataset& d, const std::map& constants) std::fill(d.y_m1.begin(), d.y_m1.end(), 0.0); std::fill(d.z_m1.begin(), d.z_m1.end(), 0.0); + generateParticleIDs(d.id); + auto cv = sph::idealGasCv(d.muiConst, d.gamma); // If temperature is not allocated, we can still use this initializer for just the coordinates diff --git a/main/src/init/turbulence_init.hpp b/main/src/init/turbulence_init.hpp index 32038f1db..20a2f5ad5 100644 --- a/main/src/init/turbulence_init.hpp +++ b/main/src/init/turbulence_init.hpp @@ -94,6 +94,8 @@ void initTurbulenceHydroFields(Dataset& d, const std::map& std::fill(d.x_m1.begin(), d.x_m1.end(), 0.); std::fill(d.y_m1.begin(), d.y_m1.end(), 0.); std::fill(d.z_m1.begin(), d.z_m1.end(), 0.); + + generateParticleIDs(d.id); } template diff --git a/main/src/init/utils.hpp b/main/src/init/utils.hpp index fb4cdbef6..d4b321cc5 100644 --- a/main/src/init/utils.hpp +++ b/main/src/init/utils.hpp @@ -37,6 +37,7 @@ #include "cstone/primitives/gather.hpp" #include "cstone/sfc/sfc.hpp" +#include "cstone/util/gsl-lite.hpp" #include "io/ifile_io.hpp" #include "init/settings.hpp" @@ -120,6 +121,25 @@ void readFileAttributes(InitSettings& settings, const std::string& settingsFile, } } +//! @brief generate particle IDs at the beginning of the simulation initialization +void generateParticleIDs(gsl::span id) +{ + int rank = 0, numRanks = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &numRanks); + + std::vector ranksLocalParticles(numRanks); + size_t localNumRanks = id.size(); + + // fill ranksLocalParticles with the number of particles per rank + MPI_Allgather(&localNumRanks, 1, MpiType{}, ranksLocalParticles.data(), 1, MpiType{}, + MPI_COMM_WORLD); + + std::exclusive_scan(ranksLocalParticles.begin(), ranksLocalParticles.end(), ranksLocalParticles.begin(), + uint64_t(0)); + std::iota(id.begin(), id.end(), ranksLocalParticles[rank]); +} + //! @brief Used to read the default values of dataset attributes class BuiltinReader { diff --git a/main/src/init/wind_shock_init.hpp b/main/src/init/wind_shock_init.hpp index df2111a63..2ada01198 100644 --- a/main/src/init/wind_shock_init.hpp +++ b/main/src/init/wind_shock_init.hpp @@ -81,6 +81,8 @@ void initWindShockFields(Dataset& d, const std::map& consta std::fill(d.mui.begin(), d.mui.end(), d.muiConst); std::fill(d.alpha.begin(), d.alpha.end(), d.alphamin); + generateParticleIDs(d.id); + T uInt = uExt / (rhoInt / rhoExt); T k = d.ngmax / r; diff --git a/main/src/propagator/std_hydro.hpp b/main/src/propagator/std_hydro.hpp index 47a9b39d2..42b5caa26 100644 --- a/main/src/propagator/std_hydro.hpp +++ b/main/src/propagator/std_hydro.hpp @@ -70,7 +70,7 @@ class HydroProp : public Propagator * * x, y, z, h and m are automatically considered conserved and must not be specified in this list */ - using ConservedFields = FieldList<"temp", "vx", "vy", "vz", "x_m1", "y_m1", "z_m1", "du_m1">; + using ConservedFields = FieldList<"temp", "vx", "vy", "vz", "x_m1", "y_m1", "z_m1", "du_m1", "id">; //! @brief the list of dependent particle fields, these may be used as scratch space during domain sync using DependentFields = diff --git a/main/src/propagator/std_hydro_grackle.hpp b/main/src/propagator/std_hydro_grackle.hpp index 8d9225cfd..fef90608c 100644 --- a/main/src/propagator/std_hydro_grackle.hpp +++ b/main/src/propagator/std_hydro_grackle.hpp @@ -69,7 +69,7 @@ class HydroGrackleProp final : public HydroProp * * x, y, z, h and m are automatically considered conserved and must not be specified in this list */ - using ConservedFields = FieldList<"u", "vx", "vy", "vz", "x_m1", "y_m1", "z_m1", "du_m1">; + using ConservedFields = FieldList<"u", "vx", "vy", "vz", "x_m1", "y_m1", "z_m1", "du_m1", "id">; //! @brief the list of dependent particle fields, these may be used as scratch space during domain sync using DependentFields = diff --git a/main/src/propagator/ve_hydro.hpp b/main/src/propagator/ve_hydro.hpp index f2c916710..54d58f792 100644 --- a/main/src/propagator/ve_hydro.hpp +++ b/main/src/propagator/ve_hydro.hpp @@ -71,7 +71,7 @@ class HydroVeProp : public Propagator * * x, y, z, h and m are automatically considered conserved and must not be specified in this list */ - using ConservedFields = FieldList<"temp", "vx", "vy", "vz", "x_m1", "y_m1", "z_m1", "du_m1", "alpha">; + using ConservedFields = FieldList<"temp", "vx", "vy", "vz", "x_m1", "y_m1", "z_m1", "du_m1", "alpha", "id">; //! @brief list of dependent fields, these may be used as scratch space during domain sync using DependentFields_ = diff --git a/main/src/propagator/ve_hydro_bdt.hpp b/main/src/propagator/ve_hydro_bdt.hpp index 52090ce0b..041c6b861 100644 --- a/main/src/propagator/ve_hydro_bdt.hpp +++ b/main/src/propagator/ve_hydro_bdt.hpp @@ -91,7 +91,7 @@ class HydroVeBdtProp : public Propagator * * x, y, z, h and m are automatically considered conserved and must not be specified in this list */ - using ConservedFields = FieldList<"temp", "vx", "vy", "vz", "x_m1", "y_m1", "z_m1", "du_m1", "alpha", "rung">; + using ConservedFields = FieldList<"temp", "vx", "vy", "vz", "x_m1", "y_m1", "z_m1", "du_m1", "alpha", "rung", "id">; //! @brief list of dependent fields, these may be used as scratch space during domain sync using DependentFields_ = FieldList<"ax", "ay", "az", "prho", "c", "du", "c11", "c12", "c13", "c22", "c23", "c33", diff --git a/sph/include/sph/particles_data.hpp b/sph/include/sph/particles_data.hpp index 02e0768a0..ea0612d4b 100644 --- a/sph/include/sph/particles_data.hpp +++ b/sph/include/sph/particles_data.hpp @@ -230,6 +230,7 @@ class ParticlesData : public cstone::FieldStates> FieldVector nc; // number of neighbors of each particle FieldVector dV11, dV12, dV13, dV22, dV23, dV33; // Velocity gradient components FieldVector rung; // rung per particle of previous timestep + FieldVector id; // unique particle id //! @brief Indices of neighbors for each particle, length is number of assigned particles * ngmax. CPU version only. std::vector neighbors; @@ -247,7 +248,7 @@ class ParticlesData : public cstone::FieldStates> "x", "y", "z", "x_m1", "y_m1", "z_m1", "vx", "vy", "vz", "rho", "u", "p", "prho", "tdpdTrho", "h", "m", "c", "ax", "ay", "az", "du", "du_m1", "c11", "c12", "c13", "c22", "c23", "c33", "mue", "mui", "temp", "cv", "xm", "kx", "divv", "curlv", - "alpha", "gradh", "keys", "nc", "dV11", "dV12", "dV13", "dV22", "dV23", "dV33", "rung"}; + "alpha", "gradh", "keys", "nc", "dV11", "dV12", "dV13", "dV22", "dV23", "dV33", "rung", "id"}; //! @brief dataset prefix to be prepended to fieldNames for structured output static const inline std::string prefix{}; @@ -263,7 +264,7 @@ class ParticlesData : public cstone::FieldStates> { auto ret = std::tie(x, y, z, x_m1, y_m1, z_m1, vx, vy, vz, rho, u, p, prho, tdpdTrho, h, m, c, ax, ay, az, du, du_m1, c11, c12, c13, c22, c23, c33, mue, mui, temp, cv, xm, kx, divv, curlv, alpha, gradh, - keys, nc, dV11, dV12, dV13, dV22, dV23, dV33, rung); + keys, nc, dV11, dV12, dV13, dV22, dV23, dV33, rung, id); #if defined(__clang__) || __GNUC__ > 11 static_assert(std::tuple_size_v == fieldNames.size()); #endif diff --git a/sph/include/sph/particles_data_gpu.cuh b/sph/include/sph/particles_data_gpu.cuh index 5283a8442..0f6d7e532 100644 --- a/sph/include/sph/particles_data_gpu.cuh +++ b/sph/include/sph/particles_data_gpu.cuh @@ -102,6 +102,7 @@ public: DevVector nc; // number of neighbors of each particle DevVector dV11, dV12, dV13, dV22, dV23, dV33; // Velocity gradient components DevVector rung; // rung per particle of previous timestep + DevVector id; // unique particle id //! @brief SPH interpolation kernel lookup tables DevVector wh, whd; @@ -118,7 +119,7 @@ public: "x", "y", "z", "x_m1", "y_m1", "z_m1", "vx", "vy", "vz", "rho", "u", "p", "prho", "tdpdTrho", "h", "m", "c", "ax", "ay", "az", "du", "du_m1", "c11", "c12", "c13", "c22", "c23", "c33", "mue", "mui", "temp", "cv", "xm", "kx", "divv", "curlv", - "alpha", "gradh", "keys", "nc", "dV11", "dV12", "dV13", "dV22", "dV23", "dV33", "rung"}; + "alpha", "gradh", "keys", "nc", "dV11", "dV12", "dV13", "dV22", "dV23", "dV33", "rung", "id"}; /*! @brief return a tuple of field references * @@ -128,7 +129,7 @@ public: { auto ret = std::tie(x, y, z, x_m1, y_m1, z_m1, vx, vy, vz, rho, u, p, prho, tdpdTrho, h, m, c, ax, ay, az, du, du_m1, c11, c12, c13, c22, c23, c33, mue, mui, temp, cv, xm, kx, divv, curlv, alpha, gradh, - keys, nc, dV11, dV12, dV13, dV22, dV23, dV33, rung); + keys, nc, dV11, dV12, dV13, dV22, dV23, dV33, rung, id); static_assert(std::tuple_size_v == fieldNames.size()); return ret; diff --git a/sph/include/sph/positions.hpp b/sph/include/sph/positions.hpp index f0c1e9a28..1d30edd51 100644 --- a/sph/include/sph/positions.hpp +++ b/sph/include/sph/positions.hpp @@ -124,6 +124,7 @@ void updatePositionsHost(size_t startIndex, size_t endIndex, Dataset& d, const c template void updateTempHost(size_t startIndex, size_t endIndex, Dataset& d) { + using Tdu = decltype(d.du[0]); bool haveMui = !d.mui.empty(); auto constCv = idealGasCv(d.muiConst, d.gamma); @@ -132,7 +133,7 @@ void updateTempHost(size_t startIndex, size_t endIndex, Dataset& d) { auto cv = haveMui ? idealGasCv(d.mui[i], d.gamma) : constCv; auto u_old = cv * d.temp[i]; - d.temp[i] = energyUpdate(u_old, d.minDt, d.minDt_m1, d.du[i], d.du_m1[i]) / cv; + d.temp[i] = energyUpdate(u_old, d.minDt, d.minDt_m1, d.du[i], Tdu(d.du_m1[i])) / cv; d.du_m1[i] = d.du[i]; } } @@ -140,10 +141,11 @@ void updateTempHost(size_t startIndex, size_t endIndex, Dataset& d) template void updateIntEnergyHost(size_t startIndex, size_t endIndex, Dataset& d) { + using Tdu = decltype(d.du[0]); #pragma omp parallel for schedule(static) for (size_t i = startIndex; i < endIndex; i++) { - d.u[i] = energyUpdate(d.u[i], d.minDt, d.minDt_m1, d.du[i], d.du_m1[i]); + d.u[i] = energyUpdate(d.u[i], d.minDt, d.minDt_m1, d.du[i], Tdu(d.du_m1[i])); d.du_m1[i] = d.du[i]; } }