From b9e4defe22894118fd5758412f4d25bcc16a99e8 Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Wed, 25 Dec 2024 17:30:47 +0300 Subject: [PATCH] context: add simple rtt estimator After b3c1c3b1a14f2070fed481d077a6daada6382b3e the only source of block time inaccuracy left is PrepareRequest propagation. In general it's undetermined and can vary from node to node and from round to round. But in practice it's relatively stable and can be averaged out over some number of samples. While backup node can't know when request was sent to it, it can measure what happens to its request when it's primary since healthy nodes are expected to respond immediately making request/response pretty similar to ping/pong roundtrip. This data also can't be perfect, messages can travel different ways from A to B and B to A on a P2P network, but in practice it's good enough and it's much better than having no data at all. Notice that this timer adjustment scheme uses local timer only, we trust our local clock inevitably and we don't need to mess with any view of time that other nodes have (including the one from the block header). It can be attacked by malicious peer or node delaying messages, but as we expect 2F+1 nodes to be correct one this is enough to collect proper number of responses and move on, seriously delayed response will be outdated by the time it comes then. And if any intermediary can delay messages to all nodes for arbitrary time on our P2P network we can't have any reliable timing anyway. To me it fixes as much of https://github.com/neo-project/neo/issues/2918 as only possible. Signed-off-by: Roman Khimov --- context.go | 4 ++++ dbft.go | 5 +++++ rtt.go | 26 ++++++++++++++++++++++++++ send.go | 2 ++ 4 files changed, 37 insertions(+) create mode 100644 rtt.go diff --git a/context.go b/context.go index 25ee4870..6d43e583 100644 --- a/context.go +++ b/context.go @@ -91,6 +91,9 @@ type Context[H Hash] struct { lastBlockTime time.Time // Wall clock time of when we started (as in PrepareRequest) creating the last block (used for timer adjustments). lastBlockIndex uint32 lastBlockView byte + + prepareSentTime time.Time + rttEstimates rtt } // N returns total number of validators. @@ -236,6 +239,7 @@ func (c *Context[H]) PreBlock() PreBlock[H] { func (c *Context[H]) reset(view byte, ts uint64) { c.MyIndex = -1 + c.prepareSentTime = time.Time{} c.lastBlockTimestamp = ts if view == 0 { diff --git a/dbft.go b/dbft.go index 9a1b0748..126954a2 100644 --- a/dbft.go +++ b/dbft.go @@ -153,6 +153,7 @@ func (d *DBFT[H]) initializeConsensus(view byte, ts uint64) { var ts = d.Timer.Now() var diff = ts.Sub(d.lastBlockTime) timeout -= diff + timeout -= d.rttEstimates.avg / 2 timeout = max(0, timeout) } d.changeTimer(timeout) @@ -482,6 +483,10 @@ func (d *DBFT[H]) onPrepareResponse(msg ConsensusPayload[H]) { } } + if d.IsPrimary() && !d.prepareSentTime.IsZero() && !d.recovering { + d.rttEstimates.addTime(time.Since(d.prepareSentTime)) + } + d.extendTimer(2) if !d.Context.WatchOnly() && !d.CommitSent() && (!d.isAntiMEVExtensionEnabled() || !d.PreCommitSent()) && d.RequestSentOrReceived() { diff --git a/rtt.go b/rtt.go new file mode 100644 index 00000000..35110e4e --- /dev/null +++ b/rtt.go @@ -0,0 +1,26 @@ +package dbft + +import ( + "time" +) + +const rttLength = 7 * 10 // 10 rounds with 7 nodes + +type rtt struct { + times [rttLength]time.Duration + idx int + avg time.Duration +} + +func (r *rtt) addTime(new time.Duration) { + var old = r.times[r.idx] + + if old != 0 { + new = min(new, 2*old) // Too long delays should be normalized, we don't want to overshoot. + } + + r.avg = r.avg + (new-old)/time.Duration(len(r.times)) + r.avg = max(0, r.avg) // Can't be less than zero. + r.times[r.idx] = new + r.idx = (r.idx + 1) % len(r.times) +} diff --git a/send.go b/send.go index cc9c5082..70c12e4c 100644 --- a/send.go +++ b/send.go @@ -27,6 +27,8 @@ func (d *DBFT[H]) sendPrepareRequest() { d.PreparationPayloads[d.MyIndex] = msg d.broadcast(msg) + d.prepareSentTime = d.Timer.Now() + delay := d.SecondsPerBlock << (d.ViewNumber + 1) if d.ViewNumber == 0 { delay -= d.SecondsPerBlock