Skip to content

Commit

Permalink
#813 eth_sync adding api function
Browse files Browse the repository at this point in the history
  • Loading branch information
kladkogex committed Dec 1, 2023
1 parent 46a4106 commit dd18d4b
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 36 deletions.
63 changes: 29 additions & 34 deletions monitoring/StuckDetectionAgent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,56 +69,51 @@ void StuckDetectionAgent::StuckDetectionLoop( StuckDetectionAgent* _agent ) {

LOG( info, "StuckDetection agent: started monitoring." );

// determine if this is the first restart, or there we restarts
// before
auto numberOfPreviousRestarts = _agent->getNumberOfPreviousRestarts();

uint64_t restartIteration = 1;

while ( true ) {
if ( _agent->getSchain()->getNode()->isExitRequested() )
return;
auto restartFileName = _agent->createStuckFileName( restartIteration );

if ( !boost::filesystem::exists( restartFileName ) ) {
break;
}
restartIteration++;
if ( numberOfPreviousRestarts > 0 ) {
LOG( info, "Stuck detection engine: previous restarts detected:" << numberOfPreviousRestarts );
}

if ( restartIteration > 1 ) {
LOG( info, "Stuck detection engine: previous restarts detected:" << to_string(
restartIteration - 1 ) );
}


if ( _agent->getSchain()->getNode()->isExitRequested() )
return;
uint64_t restartIteration = numberOfPreviousRestarts + 1;
uint64_t whenToRestart = 0;

uint64_t restartTime = 0;
uint64_t sleepTime = _agent->getSchain()->getNode()->getStuckMonitoringIntervalMs() * 1000;

while ( restartTime == 0 ) {
if ( _agent->getSchain()->getNode()->isExitRequested() )
return;
// loop until stuck is detected
do {
try {
usleep( sleepTime );
_agent->getSchain()->getNode()->exitCheck();
restartTime = _agent->checkForRestart( restartIteration );
usleep(_agent->getSchain()->getNode()->getStuckMonitoringIntervalMs() * 1000);
// this will return non-zero if skaled needs to be restarted
whenToRestart = _agent->doStuckCheck(restartIteration);
} catch ( ExitRequestedException& ) {
return;
} catch ( exception& e ) {
SkaleException::logNested( e );
}
}

} while (whenToRestart == 0 );

CHECK_STATE( restartTime > 0 );
// Stuck detection loop detected stuck. Restart.
try {
LOG( info, "Stuck detection engine: restarting skaled because of stuck detected." );
_agent->restart( restartTime, restartIteration );
_agent->restart(whenToRestart, restartIteration );
} catch ( ExitRequestedException& ) {
return;
}
}

uint64_t StuckDetectionAgent::getNumberOfPreviousRestarts() {
// each time a restart happens, a file with a corresponding name
// is created. To find out how many restarts already happened we
// count these files
uint64_t restartCounter = 0;
while (boost::filesystem::exists(restartFileName(restartCounter + 1))) {
restartCounter++;
}
return restartCounter;
}

void StuckDetectionAgent::join() {
CHECK_STATE( stuckDetectionThreadPool );
stuckDetectionThreadPool->joinAll();
Expand Down Expand Up @@ -174,7 +169,7 @@ bool StuckDetectionAgent::stuckCheck( uint64_t _restartIntervalMs, uint64_t _tim
return result;
}

uint64_t StuckDetectionAgent::checkForRestart( uint64_t _restartIteration ) {
uint64_t StuckDetectionAgent::doStuckCheck(uint64_t _restartIteration ) {
CHECK_STATE( _restartIteration >= 1 );

auto baseRestartIntervalMs = getSchain()->getNode()->getStuckRestartIntervalMs();
Expand Down Expand Up @@ -229,7 +224,7 @@ void StuckDetectionAgent::restart( uint64_t _restartTimeMs, uint64_t _iteration
exit( 13 );
}

string StuckDetectionAgent::createStuckFileName( uint64_t _iteration ) {
string StuckDetectionAgent::restartFileName(uint64_t _iteration ) {
CHECK_STATE( _iteration >= 1 );
auto engine = getNode()->getConsensusEngine();
CHECK_STATE( engine );
Expand All @@ -242,7 +237,7 @@ string StuckDetectionAgent::createStuckFileName( uint64_t _iteration ) {

void StuckDetectionAgent::createStuckRestartFile( uint64_t _iteration ) {
CHECK_STATE( _iteration >= 1 );
auto fileName = createStuckFileName( _iteration );
auto fileName = restartFileName(_iteration);

ofstream f;
f.open( fileName, ios::trunc );
Expand Down
6 changes: 4 additions & 2 deletions monitoring/StuckDetectionAgent.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,19 @@ class StuckDetectionAgent : public Agent {

void join();

uint64_t checkForRestart( uint64_t _restartIteration );
uint64_t doStuckCheck(uint64_t _restartIteration );

void restart( uint64_t _baseRestartTimeMs, uint64_t _iteration );

void createStuckRestartFile( uint64_t _iteration );

void cleanupState();

string createStuckFileName( uint64_t _iteration );
string restartFileName(uint64_t _iteration );

bool checkNodesAreOnline();

bool stuckCheck( uint64_t _restartIntervalMs, uint64_t _timeStamp );

uint64_t getNumberOfPreviousRestarts();
};

0 comments on commit dd18d4b

Please sign in to comment.