Skip to content

Commit

Permalink
feat: Migrate Pigeon TTL to block height
Browse files Browse the repository at this point in the history
The previous Pigeon keep alive implementation would use a five minute
TTL and compare against the current block time. This would frequently
lead to problems after a chain halt, during which Pigeons would not
be able to refresh their TTL.

This change removes the time based approach and substitutes the
block time with block height, with a default of 185 blocks of
lifetime before a Pigeon is considered stale.
  • Loading branch information
byte-bandit committed Aug 29, 2023
1 parent 4326f4a commit 4f6b21f
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 208 deletions.
11 changes: 6 additions & 5 deletions proto/palomachain/paloma/valset/query.proto
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,9 @@ message QueryGetValidatorAliveUntilRequest {
}

message QueryGetValidatorAliveUntilResponse {
google.protobuf.Timestamp aliveUntil = 1
[(gogoproto.nullable) = false, (gogoproto.stdtime) = true];
reserved 1; // deprecated

int64 aliveUntilBlockHeight = 2;
}

message QueryGetValidatorJailReasonRequest {
Expand All @@ -102,10 +103,10 @@ message QueryGetAlivePigeonsResponse {
message ValidatorAlive {
bytes valAddress = 1 [(gogoproto.casttype) =
"github.com/cosmos/cosmos-sdk/types.ValAddress"];
google.protobuf.Timestamp aliveUntil = 2
[(gogoproto.nullable) = false, (gogoproto.stdtime) = true];
int64 ttl = 3;
reserved 2; // deprecated
reserved 3; // deprecated
string error = 4;
int64 aliveUntilBlockHeight = 5;
}

repeated ValidatorAlive aliveValidators = 1;
Expand Down
8 changes: 4 additions & 4 deletions x/evm/types/tx.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions x/valset/keeper/grpc_query_get_alive_pigeons.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package keeper

import (
"context"
"time"

sdk "github.com/cosmos/cosmos-sdk/types"
stakingtypes "github.com/cosmos/cosmos-sdk/x/staking/types"
Expand All @@ -21,7 +20,6 @@ func (k Keeper) GetAlivePigeons(goCtx context.Context, req *types.QueryGetAliveP

vals := k.GetUnjailedValidators(ctx)

now := time.Now().UTC()
res := slice.Map(vals, func(val stakingtypes.ValidatorI) *types.QueryGetAlivePigeonsResponse_ValidatorAlive {
until, err := k.ValidatorAliveUntil(ctx, val.GetOperator())
s := &types.QueryGetAlivePigeonsResponse_ValidatorAlive{
Expand All @@ -30,8 +28,7 @@ func (k Keeper) GetAlivePigeons(goCtx context.Context, req *types.QueryGetAliveP
if err != nil {
s.Error = err.Error()
} else {
s.AliveUntil = until
s.Ttl = int64(until.Sub(now))
s.AliveUntilBlockHeight = until
}
return s
})
Expand Down
2 changes: 1 addition & 1 deletion x/valset/keeper/grpc_query_get_validator_alive_until.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ func (k Keeper) GetValidatorAliveUntil(goCtx context.Context, req *types.QueryGe
}

return &types.QueryGetValidatorAliveUntilResponse{
AliveUntil: aliveUntil,
AliveUntilBlockHeight: aliveUntil,
}, nil
}
52 changes: 35 additions & 17 deletions x/valset/keeper/keep_alive.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,25 @@ import (
)

const (
defaultKeepAliveDuration = 5 * time.Minute
cValidatorJailedErrorMessage = "validator is jailed"
cValidatorNotBondedErrorMessage = "validator is not bonded"
cJailingImminentThreshold = 2 * time.Minute
cGracePeriodBlockHeight = 10
cUnjailedSnapshotStoreKey = "unjailed-validators-snapshot"
// Deprecated. Remove after https://github.com/VolumeFi/paloma/issues/707 is deployed on all nets
defaultKeepAliveDuration = 5 * time.Minute

Check failure on line 21 in x/valset/keeper/keep_alive.go

View workflow job for this annotation

GitHub Actions / run-tests

const `defaultKeepAliveDuration` is unused (unused)
// Deprecated. Remove after https://github.com/VolumeFi/paloma/issues/707 is deployed on all nets
cJailingImminentThreshold = 2 * time.Minute

Check failure on line 23 in x/valset/keeper/keep_alive.go

View workflow job for this annotation

GitHub Actions / run-tests

const `cJailingImminentThreshold` is unused (unused)

cValidatorJailedErrorMessage = "validator is jailed"
cValidatorNotBondedErrorMessage = "validator is not bonded"
cJailingDefaultKeepAliveBlockHeight = 185 // calculated against current block speed of 1.612 seconds
cJailingImminentThresholdBlockHeight = 60 // publish warning if less than 60 blocks worth of TTL remaining
cJailingGracePeriodBlockHeight = 30 // don't jail a validator during the first 30 blocks after unjailing
cUnjailedSnapshotStoreKey = "unjailed-validators-snapshot"
)

type keepAliveData struct {
ValAddr sdk.ValAddress
ContactedAt time.Time
AliveUntil time.Time
// Deprecated. Remove after https://github.com/VolumeFi/paloma/issues/707 is deployed on all nets
AliveUntil time.Time
AliveUntilBlockHeight int64
}

func (k Keeper) KeepValidatorAlive(ctx sdk.Context, valAddr sdk.ValAddress, pigeonVersion string) error {
Expand All @@ -38,9 +45,9 @@ func (k Keeper) KeepValidatorAlive(ctx sdk.Context, valAddr sdk.ValAddress, pige

store := k.keepAliveStore(ctx)
data := keepAliveData{
ValAddr: valAddr,
ContactedAt: ctx.BlockTime(),
AliveUntil: ctx.BlockTime().Add(defaultKeepAliveDuration),
ValAddr: valAddr,
ContactedAt: ctx.BlockTime(),
AliveUntilBlockHeight: ctx.BlockHeader().Height + cJailingDefaultKeepAliveBlockHeight,
}
bz, err := json.Marshal(data)
if err != nil {
Expand All @@ -55,27 +62,38 @@ func (k Keeper) IsValidatorAlive(ctx sdk.Context, valAddr sdk.ValAddress) (bool,
if err != nil {
return false, err
}
return ctx.BlockTime().Before(aliveUntil), nil

return ctx.BlockHeight() < aliveUntil, nil
}

func (k Keeper) ValidatorAliveUntil(ctx sdk.Context, valAddr sdk.ValAddress) (time.Time, error) {
func (k Keeper) ValidatorAliveUntil(ctx sdk.Context, valAddr sdk.ValAddress) (int64, error) {
store := k.keepAliveStore(ctx)
if !store.Has(valAddr) {
return time.Time{}, ErrValidatorNotInKeepAlive.Format(valAddr)
return 0, ErrValidatorNotInKeepAlive.Format(valAddr)
}

dataBz := store.Get(valAddr)
var data keepAliveData
err := json.Unmarshal(dataBz, &data)
if err != nil {
return time.Time{}, err
return 0, err
}

// hack hack hack
// To ensure a smooth migration to block based Pigeon TTL, we're going to pretend we received a valid
// future block height as long as the AliveUntil time is still valid.
// remove this once https://github.com/VolumeFi/paloma/issues/709 has been deployed to all nets
if data.AliveUntilBlockHeight == 0 {
if ctx.BlockTime().Before(data.AliveUntil) {
data.AliveUntilBlockHeight = ctx.BlockHeight() + cJailingDefaultKeepAliveBlockHeight
}
}

if data.AliveUntil.UTC().Sub(ctx.BlockTime().UTC()) < cJailingImminentThreshold {
if data.AliveUntilBlockHeight-ctx.BlockHeight() <= cJailingImminentThresholdBlockHeight {
k.Logger(ctx).Info("Validator TTL is about to run out. Jailing is imminent.", "validator-address", data.ValAddr)
}

return data.AliveUntil, nil
return data.AliveUntilBlockHeight, nil
}

func (k Keeper) CanAcceptKeepAlive(ctx sdk.Context, valAddr sdk.ValAddress, pigeonVersion string) error {
Expand Down Expand Up @@ -180,7 +198,7 @@ func (k Keeper) JailInactiveValidators(ctx sdk.Context) error {
func (k Keeper) isValidatorInGracePeriod(ctx sdk.Context, valAddr sdk.ValAddress) bool {
store := k.gracePeriodStore(ctx)
bytes := store.Get(valAddr)
return libvalid.NotNil(bytes) && ctx.BlockHeight()-int64(sdk.BigEndianToUint64(bytes)) <= cGracePeriodBlockHeight
return libvalid.NotNil(bytes) && ctx.BlockHeight()-int64(sdk.BigEndianToUint64(bytes)) <= cJailingGracePeriodBlockHeight
}

func (k Keeper) keepAliveStore(ctx sdk.Context) sdk.KVStore {
Expand Down
7 changes: 3 additions & 4 deletions x/valset/keeper/keep_alive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"fmt"
"testing"
"time"

sdk "github.com/cosmos/cosmos-sdk/types"
stakingtypes "github.com/cosmos/cosmos-sdk/x/staking/types"
Expand All @@ -15,7 +14,7 @@ import (

func TestJailingInactiveValidators(t *testing.T) {
k, ms, ctx := newValsetKeeper(t)
ctx = ctx.WithBlockTime(time.Unix(1000000000, 0)).WithBlockHeight(100)
ctx = ctx.WithBlockHeight(1000)

valBuild := func(id int, toBeJailed bool) (*mocks.StakingValidatorI, sdk.ValAddress) {
val := sdk.ValAddress(fmt.Sprintf("validator_%d", id))
Expand All @@ -30,7 +29,7 @@ func TestJailingInactiveValidators(t *testing.T) {
vali.On("GetConsAddr").Return(consAddr, nil)
ms.StakingKeeper.On("Jail", mock.Anything, consAddr)
} else {
err := k.KeepValidatorAlive(ctx.WithBlockTime(ctx.BlockTime().Add(-defaultKeepAliveDuration/2)), val, "v1.4.0")
err := k.KeepValidatorAlive(ctx.WithBlockHeight(ctx.BlockHeight()-(cJailingDefaultKeepAliveBlockHeight/2)), val, "v1.4.0")
require.NoError(t, err)
}
return vali, val
Expand Down Expand Up @@ -124,7 +123,7 @@ func TestCanAcceptKeepAlive(t *testing.T) {

func TestUpdateGracePeriod(t *testing.T) {
k, ms, ctx := newValsetKeeper(t)
ctx = ctx.WithBlockTime(time.Unix(1000000000, 0)).WithBlockHeight(100)
ctx = ctx.WithBlockHeight(1000)

valBuild := func(id int) (*mocks.StakingValidatorI, sdk.ValAddress) {
val := sdk.ValAddress(fmt.Sprintf("validator_%d", id))
Expand Down
9 changes: 4 additions & 5 deletions x/valset/keeper/keeper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package keeper

import (
"testing"
"time"

"github.com/cometbft/cometbft/libs/log"
tmproto "github.com/cometbft/cometbft/proto/tendermint/types"
Expand Down Expand Up @@ -50,7 +49,7 @@ func TestIfValidatorCanBeAccepted(t *testing.T) {

func TestRegisteringPigeon(t *testing.T) {
k, ms, ctx := newValsetKeeper(t)
ctx = ctx.WithBlockTime(time.Unix(999999999, 0))
ctx = ctx.WithBlockHeight(1000)
val := sdk.ValAddress("validator")
val2 := sdk.ValAddress("validator2")
nonExistingVal := sdk.ValAddress("i dont exist")
Expand All @@ -68,7 +67,7 @@ func TestRegisteringPigeon(t *testing.T) {
ms.StakingKeeper.On("Validator", mock.Anything, nonExistingVal).Return(nil)

t.Run("if validator has been alive before, but it's not now, then it returns an error", func(t *testing.T) {
err := k.KeepValidatorAlive(ctx.WithBlockTime(time.Unix(555, 0)), val, "v1.4.0")
err := k.KeepValidatorAlive(ctx.WithBlockHeight(500), val, "v1.4.0")
require.NoError(t, err)
alive, err := k.IsValidatorAlive(ctx, val)
require.NoError(t, err)
Expand Down Expand Up @@ -533,7 +532,7 @@ func TestGracePeriodCoverage(t *testing.T) {
ctx = ctx.WithBlockHeight(100)

t.Run("with unjailed validator covered by grace period", func(t *testing.T) {
for i := cGracePeriodBlockHeight; i >= 0; i-- {
for i := cJailingGracePeriodBlockHeight; i >= 0; i-- {
val := sdk.ValAddress("validator")
bh := sdk.Uint64ToBigEndian(uint64(ctx.BlockHeight() - int64(i)))
k.gracePeriodStore(ctx).Set(val, bh)
Expand All @@ -543,7 +542,7 @@ func TestGracePeriodCoverage(t *testing.T) {

t.Run("with unjailed validator no longer covered by grace period", func(t *testing.T) {
val := sdk.ValAddress("validator")
bh := sdk.Uint64ToBigEndian(uint64(ctx.BlockHeight() - cGracePeriodBlockHeight - 1))
bh := sdk.Uint64ToBigEndian(uint64(ctx.BlockHeight() - cJailingGracePeriodBlockHeight - 1))
k.gracePeriodStore(ctx).Set(val, bh)
require.False(t, k.isValidatorInGracePeriod(ctx, val), "bh = %d", bh)
})
Expand Down
Loading

0 comments on commit 4f6b21f

Please sign in to comment.