From 2b1cd95f689f779a1c18b91db0cb87a6ef1cef43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Wed, 28 Feb 2024 23:10:01 +0000 Subject: [PATCH] vochain/state: fix a data race in UpdateSIKRoots UpdateSIKRoots uses State.NoState with withTxLock=false, as it grabs the State.tx lock itself, so NoState grabbing the same lock for reads and writes would cause deadlocks. However, UpdateSIKRoots only grabbed a read lock, and towards the end of its body it made a call to NoState.Set, which is a write operation that needs the write lock to be held. First, fix UpdateSIKRoots so it holds a write lock. Second, tweak NoState so that it loudly complains as soon as it can if it spots that reads or writes are happening without the mutex being properly locked at the right level. Note that for writes we only do this on the first call to Set or Delete, as some NoState callers only ever call read-only methods like Get. Hopefully these panic assertions will mean we catch future races much sooner; at least, without the UpdateSIKRoots fix, the new assertions make multiple of our tests panic every time. Fixes #1209. --- vochain/state/nostate.go | 15 +++++++++++++++ vochain/state/sik.go | 8 +++++--- vochain/state/state.go | 2 ++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/vochain/state/nostate.go b/vochain/state/nostate.go index 96592d964..ff05fc924 100644 --- a/vochain/state/nostate.go +++ b/vochain/state/nostate.go @@ -14,6 +14,13 @@ import "go.vocdoni.io/dvote/db" // The NoState transaction is committed or discarted with the state // transaction at Save() or Rollback(). func (s *State) NoState(withTxLock bool) *NoState { + if !withTxLock && s.tx.TryLock() { + // withTxLock only exists so it can be set to false when the caller + // already holds the State.tx lock, to avoid deadlocks. + // If the lock is not held in any way and withTxLock is false, + // it is a race to read from or write to the state. + panic("State.NoState with withTxLock=false can only be called when State.tx is read or write locked") + } return &NoState{ state: s, withTxLock: withTxLock, @@ -31,6 +38,10 @@ func (ns *NoState) Set(key, value []byte) error { if ns.withTxLock { ns.state.tx.Lock() defer ns.state.tx.Unlock() + } else if ns.state.tx.TryRLock() { + // Like the check in the constructor, but to check if NoState is used for writes + // when the State.tx lock is only held for reads. + panic("State.NoState with withTxLock=false can only use writes when State.tx is write locked") } return ns.state.store.NoStateWriteTx.Set(key, value) } @@ -64,6 +75,10 @@ func (ns *NoState) Delete(key []byte) error { if ns.withTxLock { ns.state.tx.Lock() defer ns.state.tx.Unlock() + } else if ns.state.tx.TryRLock() { + // Like the check in the constructor, but to check if NoState is used for writes + // when the State.tx lock is only held for reads. + panic("State.NoState with withTxLock=false can only use writes when State.tx is write locked") } return ns.state.store.NoStateWriteTx.Delete(key) } diff --git a/vochain/state/sik.go b/vochain/state/sik.go index 9e1b0e3e3..63bb4e60e 100644 --- a/vochain/state/sik.go +++ b/vochain/state/sik.go @@ -188,12 +188,14 @@ func (v *State) UpdateSIKRoots() error { // network height. v.mtxValidSIKRoots.Lock() defer v.mtxValidSIKRoots.Unlock() - sikNoStateDB := v.NoState(false) currentBlock := v.CurrentHeight() + // Note that later on we call sikNoStateDB.Set, so we must grab a write lock. + v.tx.Lock() + defer v.tx.Unlock() + sikNoStateDB := v.NoState(false) + // get sik roots key-value database associated to the siks tree - v.tx.RLock() - defer v.tx.RUnlock() siksTree, err := v.tx.DeepSubTree(StateTreeCfg(TreeSIK)) if err != nil { return fmt.Errorf("%w: %w", ErrSIKSubTree, err) diff --git a/vochain/state/state.go b/vochain/state/state.go index 488f60cde..be0224f22 100644 --- a/vochain/state/state.go +++ b/vochain/state/state.go @@ -337,6 +337,8 @@ func (v *State) Save() ([]byte, error) { if err := v.UpdateSIKRoots(); err != nil { return nil, fmt.Errorf("cannot update SIK roots: %w", err) } + // TODO: UpdateSIKRoots holds a write lock during its run, + // and we grab another one here immediately after. Join them? v.tx.Lock() defer v.tx.Unlock() err := func() error {