diff --git a/libstuff/SSignal.cpp b/libstuff/SSignal.cpp index d623a9ad7..e4f2b86af 100644 --- a/libstuff/SSignal.cpp +++ b/libstuff/SSignal.cpp @@ -1,5 +1,5 @@ #include "libstuff.h" - +#include #include #include #include @@ -202,12 +202,19 @@ void _SSignal_StackTrace(int signum, siginfo_t *info, void *ucontext) { SWARN("Calling DIE function."); SSignalHandlerDieFunc(); SSignalHandlerDieFunc = [](){}; - SWARN("DIE function returned, aborting (if not done)."); + SWARN("DIE function returned."); + if (SQLiteNode::KILLABLE_SQLITE_NODE) { + SWARN("Killing peer connections."); + SQLiteNode::KILLABLE_SQLITE_NODE->kill(); + } } // If we weren't already in ABORT, we'll call that. The second call will skip the above callstack generation. if (signum != SIGABRT) { + SWARN("Aborting."); abort(); + } else { + SWARN("Already in ABORT."); } } else { SALERT("Non-signal thread got signal " << strsignal(signum) << "(" << signum << "), which wasn't expected"); diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 4485ce8c1..2a0c7e31b 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -59,6 +59,8 @@ #undef SLOGPREFIX #define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} " +SQLiteNode* SQLiteNode::KILLABLE_SQLITE_NODE{0}; + // Initializations for static vars. const uint64_t SQLiteNode::RECV_TIMEOUT{STIME_US_PER_S * 30}; @@ -145,6 +147,7 @@ SQLiteNode::SQLiteNode(SQLiteServer& server, shared_ptr dbPool, cons _stateTimeout(STimeNow() + firstTimeout), _syncPeer(nullptr) { + KILLABLE_SQLITE_NODE = this; SASSERT(_originalPriority >= 0); onPrepareHandlerEnabled = false; @@ -2716,3 +2719,10 @@ SQLiteNodeState SQLiteNode::stateFromName(const string& name) { return it->second; } } + +void SQLiteNode::kill() { + for (SQLitePeer* peer : _peerList) { + SWARN("Killing peer: " << peer->name); + peer->reset(); + } +} diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 7b2f3ba0d..1244f0cfa 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -76,6 +76,10 @@ class SQLiteNode : public STCPManager { NUM_CONSISTENCY_LEVELS }; + // This is a globally accessible pointer to some node instance. The intention here is to let signal handling code attempt to kill outstanding + // peer connections on this node before shutting down. + static SQLiteNode* KILLABLE_SQLITE_NODE; + // Receive timeout for cluster messages. static const uint64_t RECV_TIMEOUT; @@ -152,6 +156,9 @@ class SQLiteNode : public STCPManager { // Call this if you want to shut down the node. void beginShutdown(); + // kill all peer connections on this node. + void kill(); + // Handle any read/write events that occurred. void postPoll(fd_map& fdm, uint64_t& nextActivity); diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index 624725b74..94f4eb5bd 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -72,8 +72,7 @@ SQLitePeer::PeerPostPollStatus SQLitePeer::postPoll(fd_map& fdm, uint64_t& nextA switch (socket->state.load()) { case STCPManager::Socket::CONNECTED: { // socket->lastRecvTime is always set, it's initialized to STimeNow() at creation. - auto lastActivityTime = max(socket->lastSendTime, socket->lastRecvTime); - if (lastActivityTime + SQLiteNode::RECV_TIMEOUT < STimeNow()) { + if (socket->lastRecvTime + SQLiteNode::RECV_TIMEOUT < STimeNow()) { SHMMM("Connection with peer '" << name << "' timed out."); return PeerPostPollStatus::SOCKET_ERROR; }