Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cc): fix message passing when nghost is 0 and send list is empty #4237

Merged
merged 9 commits into from
Oct 23, 2024
22 changes: 10 additions & 12 deletions source/api_cc/src/DeepPotPT.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
nlist_data.copy_from_nlist(lmp_list);
nlist_data.shuffle_exclude_empty(fwd_map);
nlist_data.padding();
if (do_message_passing == 1 && nghost > 0) {
if (do_message_passing == 1) {
int nswap = lmp_list.nswap;
torch::Tensor sendproc_tensor =
torch::from_blob(lmp_list.sendproc, {nswap}, int32_option);
Expand All @@ -180,10 +180,14 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
torch::from_blob(lmp_list.recvnum, {nswap}, int32_option);
torch::Tensor sendnum_tensor =
torch::from_blob(lmp_list.sendnum, {nswap}, int32_option);
torch::Tensor communicator_tensor = torch::from_blob(
const_cast<void*>(lmp_list.world), {1}, torch::kInt64);
// torch::Tensor communicator_tensor =
// torch::tensor(lmp_list.world, int32_option);
torch::Tensor communicator_tensor;
if (lmp_list.world == 0) {
communicator_tensor = torch::empty({1}, torch::kInt64);
} else {
communicator_tensor = torch::from_blob(
const_cast<void*>(lmp_list.world), {1}, torch::kInt64);
}

torch::Tensor nswap_tensor = torch::tensor(nswap, int32_option);
int total_send =
std::accumulate(lmp_list.sendnum, lmp_list.sendnum + nswap, 0);
Expand All @@ -196,12 +200,6 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
comm_dict.insert("recv_num", recvnum_tensor);
comm_dict.insert("communicator", communicator_tensor);
}
if (do_message_passing == 1 && nghost == 0) {
// for the situation that no ghost atoms (e.g. serial nopbc)
// set the mapping arange(nloc) is enough
auto option = torch::TensorOptions().device(device).dtype(torch::kInt64);
mapping_tensor = at::arange(nloc_real, option).unsqueeze(0);
}
}
at::Tensor firstneigh = createNlistTensor(nlist_data.jlist);
firstneigh_tensor = firstneigh.to(torch::kInt64).to(device);
Expand All @@ -224,7 +222,7 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
.to(device);
}
c10::Dict<c10::IValue, c10::IValue> outputs =
(do_message_passing == 1 && nghost > 0)
(do_message_passing == 1)
? module
.run_method("forward_lower", coord_wrapped_Tensor, atype_Tensor,
firstneigh_tensor, mapping_tensor, fparam_tensor,
Expand Down
81 changes: 0 additions & 81 deletions source/api_cc/tests/test_deeppot_dpa_pt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -310,84 +310,3 @@ TYPED_TEST(TestInferDeepPotDpaPtNopbc, cpu_build_nlist_atomic) {
EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
}
}

TYPED_TEST(TestInferDeepPotDpaPtNopbc, cpu_lmp_nlist) {
using VALUETYPE = TypeParam;
std::vector<VALUETYPE>& coord = this->coord;
std::vector<int>& atype = this->atype;
std::vector<VALUETYPE>& box = this->box;
std::vector<VALUETYPE>& expected_e = this->expected_e;
std::vector<VALUETYPE>& expected_f = this->expected_f;
std::vector<VALUETYPE>& expected_v = this->expected_v;
int& natoms = this->natoms;
double& expected_tot_e = this->expected_tot_e;
std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
deepmd::DeepPot& dp = this->dp;
double ener;
std::vector<VALUETYPE> force, virial;

std::vector<std::vector<int> > nlist_data = {
{1, 2, 3, 4, 5}, {0, 2, 3, 4, 5}, {0, 1, 3, 4, 5},
{0, 1, 2, 4, 5}, {0, 1, 2, 3, 5}, {0, 1, 2, 3, 4}};
std::vector<int> ilist(natoms), numneigh(natoms);
std::vector<int*> firstneigh(natoms);
deepmd::InputNlist inlist(natoms, &ilist[0], &numneigh[0], &firstneigh[0]);
convert_nlist(inlist, nlist_data);
dp.compute(ener, force, virial, coord, atype, box, 0, inlist, 0);

EXPECT_EQ(force.size(), natoms * 3);
EXPECT_EQ(virial.size(), 9);

EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
for (int ii = 0; ii < natoms * 3; ++ii) {
EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
}
for (int ii = 0; ii < 3 * 3; ++ii) {
EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
}
}

TYPED_TEST(TestInferDeepPotDpaPtNopbc, cpu_lmp_nlist_atomic) {
using VALUETYPE = TypeParam;
std::vector<VALUETYPE>& coord = this->coord;
std::vector<int>& atype = this->atype;
std::vector<VALUETYPE>& box = this->box;
std::vector<VALUETYPE>& expected_e = this->expected_e;
std::vector<VALUETYPE>& expected_f = this->expected_f;
std::vector<VALUETYPE>& expected_v = this->expected_v;
int& natoms = this->natoms;
double& expected_tot_e = this->expected_tot_e;
std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
deepmd::DeepPot& dp = this->dp;
double ener;
std::vector<VALUETYPE> force, virial, atom_ener, atom_vir;

std::vector<std::vector<int> > nlist_data = {
{1, 2, 3, 4, 5}, {0, 2, 3, 4, 5}, {0, 1, 3, 4, 5},
{0, 1, 2, 4, 5}, {0, 1, 2, 3, 5}, {0, 1, 2, 3, 4}};
std::vector<int> ilist(natoms), numneigh(natoms);
std::vector<int*> firstneigh(natoms);
deepmd::InputNlist inlist(natoms, &ilist[0], &numneigh[0], &firstneigh[0]);
convert_nlist(inlist, nlist_data);
dp.compute(ener, force, virial, atom_ener, atom_vir, coord, atype, box, 0,
inlist, 0);

EXPECT_EQ(force.size(), natoms * 3);
EXPECT_EQ(virial.size(), 9);
EXPECT_EQ(atom_ener.size(), natoms);
EXPECT_EQ(atom_vir.size(), natoms * 9);

EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
for (int ii = 0; ii < natoms * 3; ++ii) {
EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
}
for (int ii = 0; ii < 3 * 3; ++ii) {
EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
}
for (int ii = 0; ii < natoms; ++ii) {
EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
}
for (int ii = 0; ii < natoms * 9; ++ii) {
EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
}
}
8 changes: 0 additions & 8 deletions source/lmp/tests/test_lammps_dpa_pt_nopbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,14 +681,6 @@ def test_pair_deepmd_si(lammps_si):
[(["--balance"],), ([],)],
)
def test_pair_deepmd_mpi(balance_args: list):
if balance_args == []:
# python:5331 terminated with signal 11 at PC=7f3e940e3806 SP=7ffd5787edc0. Backtrace:
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x95806)[0x7f3e940e3806]
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x8f76e)[0x7f3e940dd76e]
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x9a38a)[0x7f3e940e838a]
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(_Z9border_opRKN2at6TensorES2_S2_S2_S2_S2_S2_S2_S2_+0x8e)[0x7f3e940dda63]
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0xaeac3)[0x7f3e940fcac3]
pytest.skip(reason="Known segfault, see comments for details")
with tempfile.NamedTemporaryFile() as f:
sp.check_call(
[
Expand Down
63 changes: 37 additions & 26 deletions source/op/pt/comm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,18 @@ class Border : public torch::autograd::Function<Border> {
int mpi_init = 0;
MPI_Initialized(&mpi_init);
int cuda_aware = 1;
int me;
int me = 0;
MPI_Comm world;
int world_size = 0;
unpack_communicator(communicator_tensor, world);
MPI_Comm_rank(world, &me);
MPI_Comm_size(world, &world_size);
if (mpi_init) {
unpack_communicator(communicator_tensor, world);
MPI_Comm_rank(world, &me);
MPI_Comm_size(world, &world_size);
}
MPI_Datatype mpi_type = get_mpi_type<FPTYPE>();
MPI_Request request;
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
if (world_size != 1) {
if (world_size >= 1) {
int version, subversion;
MPI_Get_version(&version, &subversion);
if (version >= 4) {
Expand All @@ -120,11 +122,15 @@ class Border : public torch::autograd::Function<Border> {
for (int iswap = 0; iswap < nswap; ++iswap) {
int nrecv = recvnum[iswap];
int nsend = sendnum[iswap];
torch::Tensor isendlist =
torch::from_blob(sendlist[iswap], {nsend}, int32_options)
.to(recv_g1_tensor.device());
torch::Tensor send_g1_tensor = recv_g1_tensor.index_select(0, isendlist);
FPTYPE* send_g1 = send_g1_tensor.data_ptr<FPTYPE>();
torch::Tensor isendlist;
torch::Tensor send_g1_tensor;
FPTYPE* send_g1;
if (nsend != 0) {
isendlist = torch::from_blob(sendlist[iswap], {nsend}, int32_options)
.to(recv_g1_tensor.device());
send_g1_tensor = recv_g1_tensor.index_select(0, isendlist);
send_g1 = send_g1_tensor.data_ptr<FPTYPE>();
}
#ifdef USE_MPI
if (sendproc[iswap] != me) {
if (nrecv) {
Expand Down Expand Up @@ -207,15 +213,17 @@ class Border : public torch::autograd::Function<Border> {
MPI_Initialized(&mpi_init);
int world_size = 0;
int cuda_aware = 1;
int me = 0;
MPI_Comm world;
unpack_communicator(communicator_tensor, world);
int me;
MPI_Comm_rank(world, &me);
MPI_Comm_size(world, &world_size);
if (mpi_init) {
unpack_communicator(communicator_tensor, world);
MPI_Comm_rank(world, &me);
MPI_Comm_size(world, &world_size);
}
MPI_Datatype mpi_type = get_mpi_type<FPTYPE>();
MPI_Request request;
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
if (world_size != 1) {
if (world_size >= 1) {
int version, subversion;
MPI_Get_version(&version, &subversion);
if (version >= 4) {
Expand Down Expand Up @@ -248,17 +256,20 @@ class Border : public torch::autograd::Function<Border> {
int nlocal = nlocal_tensor.item<int>();
int nghost = nghost_tensor.item<int>();
int ntotal = nlocal + nghost;

torch::Tensor send_g1_tensor = d_local_g1_tensor;

int max_recvnum = sendnum_tensor.max().item<int>();
auto options = torch::TensorOptions()
.dtype(d_local_g1_tensor.dtype())
.device(d_local_g1_tensor.device());
torch::Tensor recv_g1_tensor =
torch::empty({max_recvnum, tensor_size}, options);
FPTYPE* recv_g1 = recv_g1_tensor.data_ptr<FPTYPE>();
FPTYPE* send_g1 = send_g1_tensor.data_ptr<FPTYPE>() + ntotal * tensor_size;
torch::Tensor send_g1_tensor;
torch::Tensor recv_g1_tensor;
FPTYPE* recv_g1;
FPTYPE* send_g1;
if (nswap != 0) {
send_g1_tensor = d_local_g1_tensor;
int max_recvnum = sendnum_tensor.max().item<int>();
auto options = torch::TensorOptions()
.dtype(d_local_g1_tensor.dtype())
.device(d_local_g1_tensor.device());
recv_g1_tensor = torch::empty({max_recvnum, tensor_size}, options);
recv_g1 = recv_g1_tensor.data_ptr<FPTYPE>();
send_g1 = send_g1_tensor.data_ptr<FPTYPE>() + ntotal * tensor_size;
}

int end = ntotal;
auto int32_options = torch::TensorOptions().dtype(torch::kInt32);
Expand Down
Loading