From 6584eace2ac339947c9fa35f6f23b635b3118f22 Mon Sep 17 00:00:00 2001 From: Scott Hemmert Date: Mon, 7 Oct 2024 10:15:37 -0600 Subject: [PATCH] Bug fixes for parallel checkpointing. - Fixes #1156 - Initial Sync interval needed to take Checkpoint time into account. - Fixes #1157 - Moved barrier for Exit restart to the proper place. --- src/sst/core/simulation.cc | 2 +- src/sst/core/sync/syncManager.cc | 5 ++++- src/sst/core/testElements/coreTest_Component.h | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/sst/core/simulation.cc b/src/sst/core/simulation.cc index 3a7b793d0..618fc56da 100644 --- a/src/sst/core/simulation.cc +++ b/src/sst/core/simulation.cc @@ -1616,7 +1616,6 @@ Simulation_impl::checkpoint(const std::string& checkpoint_filename) // Actions that may also be in TV ser& real_time_; if ( my_rank.thread == 0 ) { ser& m_exit; } - initBarrier.wait(); ser& m_heartbeat; // Add shared StatisticOutput vector @@ -1751,6 +1750,7 @@ Simulation_impl::restart(Config* cfg) // Actions that may also be in TV ser& real_time_; if ( my_rank.thread == 0 ) { ser& m_exit; } + initBarrier.wait(); // Create new checkpoint object. Needs to be done before SyncManager is reinitialized if ( cfg->checkpoint_sim_period() != "" ) { diff --git a/src/sst/core/sync/syncManager.cc b/src/sst/core/sync/syncManager.cc index 0975beb04..9c1e7dcbb 100644 --- a/src/sst/core/sync/syncManager.cc +++ b/src/sst/core/sync/syncManager.cc @@ -495,7 +495,10 @@ SyncManager::finalizeLinkConfigurations() // Need to figure out what sync comes first and insert object into // TimeVortex if ( num_ranks_.rank == 1 && num_ranks_.thread == 1 ) return; - computeNextInsert(); + if ( checkpoint_ ) + computeNextInsert(checkpoint_->getNextCheckpointSimTime()); + else + computeNextInsert(); } /** Prepare for complete() phase */ diff --git a/src/sst/core/testElements/coreTest_Component.h b/src/sst/core/testElements/coreTest_Component.h index 62dcdd106..20fd95764 100644 --- a/src/sst/core/testElements/coreTest_Component.h +++ b/src/sst/core/testElements/coreTest_Component.h @@ -116,8 +116,8 @@ class coreTestComponent : public coreTestComponentBase2 coreTestComponent(SST::ComponentId_t id, SST::Params& params); ~coreTestComponent(); - void setup() {} - void finish() { printf("Component Finished.\n"); } + void setup() override {} + void finish() override { printf("Component Finished.\n"); } void serialize_order(SST::Core::Serialization::serializer& ser) override; ImplementSerializable(SST::CoreTestComponent::coreTestComponent)