Skip to content

Commit

Permalink
test: tss migration e2e test (#2440)
Browse files Browse the repository at this point in the history
* add new runner for migration tests

* skip adding new keygen

* skip adding new keygen

* modify btc runner

* tss fund migrate test for eth

* add queries for tss funds migrator info

* add changelog

* fix lint messages

* refactor keygen

* refactor keygen

* rearrange keygen creations

* rearrange keygen creations

* modify accounting

* rebase develop

* add retest after migration

* add checks for error messages

* add unit test for CheckMigration

* fix unit tests for update msgUpdateTSS

* fix some lint issues and rename all runners to r

* fix some lint issues and rename all runners to r

* add changelog

* fix lint errors

* fix lint errors

* removed debug logs

* add comments for zetasupervisor lib

* add new user for migration tests

* add new user for migration tests

* change name to start-tss-migration-test

* add changes for comments 1

* add changes for comments 1

* rename to CheckIfMigrationDeposit

* rename to CheckIfMigrationDeposit

* use cctx_Gateway to check cctx

* add Tss migration to e2e.yml

* move to a sub function

* resolve comments 2

* resolve comments 3

* add todo

* resolve comments 4

* Update .github/workflows/e2e.yml

Co-authored-by: Francisco de Borja Aranda Castillejo <[email protected]>

---------

Co-authored-by: Francisco de Borja Aranda Castillejo <[email protected]>
  • Loading branch information
kingpinXD and fbac authored Jul 17, 2024
1 parent abc15c8 commit 8e1cfa7
Show file tree
Hide file tree
Showing 40 changed files with 1,631 additions and 203 deletions.
14 changes: 12 additions & 2 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ on:
type: boolean
required: false
default: false
tss-migration-test:
type: boolean
required: false
default: false

concurrency:
group: e2e-${{ github.head_ref || github.sha }}
Expand All @@ -61,13 +65,14 @@ jobs:
ADMIN_TESTS: ${{ steps.matrix-conditionals.outputs.ADMIN_TESTS }}
PERFORMANCE_TESTS: ${{ steps.matrix-conditionals.outputs.PERFORMANCE_TESTS }}
STATEFUL_DATA_TESTS: ${{ steps.matrix-conditionals.outputs.STATEFUL_DATA_TESTS }}
TSS_MIGRATION_TESTS: ${{ steps.matrix-conditionals.outputs.TSS_MIGRATION_TESTS }}

steps:
# use api rather than event context to avoid race conditions (label added after push)
- id: matrix-conditionals
uses: actions/github-script@v7
with:
script: |
console.log(context);
if (context.eventName === 'pull_request') {
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
Expand All @@ -83,6 +88,7 @@ jobs:
core.setOutput('ADMIN_TESTS', labels.includes('ADMIN_TESTS'));
core.setOutput('PERFORMANCE_TESTS', labels.includes('PERFORMANCE_TESTS'));
core.setOutput('STATEFUL_DATA_TESTS', labels.includes('STATEFUL_DATA_TESTS'));
core.setOutput('TSS_MIGRATION_TESTS', labels.includes('TSS_MIGRATION_TESTS'));
} else if (context.eventName === 'merge_group') {
core.setOutput('DEFAULT_TESTS', true);
} else if (context.eventName === 'push' && context.ref === 'refs/heads/develop') {
Expand Down Expand Up @@ -111,6 +117,7 @@ jobs:
core.setOutput('ADMIN_TESTS', context.payload.inputs['admin-test']);
core.setOutput('PERFORMANCE_TESTS', context.payload.inputs['performance-test']);
core.setOutput('STATEFUL_DATA_TESTS', context.payload.inputs['stateful-data-test']);
core.setOutput('TSS_MIGRATION_TESTS', context.payload.inputs['tss-migration-test']);
}
e2e:
Expand Down Expand Up @@ -140,6 +147,9 @@ jobs:
- make-target: "start-e2e-import-mainnet-test"
runs-on: buildjet-16vcpu-ubuntu-2204
run: ${{ needs.matrix-conditionals.outputs.STATEFUL_DATA_TESTS == 'true' }}
- make-target: "start-tss-migration-test"
runs-on: ubuntu-20.04
run: ${{ needs.matrix-conditionals.outputs.TSS_MIGRATION_TESTS == 'true' }}
name: ${{ matrix.make-target }}
uses: ./.github/workflows/reusable-e2e.yml
with:
Expand All @@ -150,7 +160,7 @@ jobs:
# this allows you to set a required status check
e2e-ok:
runs-on: ubuntu-22.04
needs:
needs:
- matrix-conditionals
- e2e
if: always()
Expand Down
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ build-testnet-ubuntu: go.sum
docker rm temp-container

install: go.sum
@echo "--> Installing zetacored & zetaclientd"
@echo "--> Installing zetacored, zetaclientd, and zetaclientd-supervisor"
@go install -mod=readonly $(BUILD_FLAGS) ./cmd/zetacored
@go install -mod=readonly $(BUILD_FLAGS) ./cmd/zetaclientd
@go install -mod=readonly $(BUILD_FLAGS) ./cmd/zetaclientd-supervisor
Expand Down Expand Up @@ -254,6 +254,11 @@ start-stress-test: zetanode
@echo "--> Starting stress test"
cd contrib/localnet/ && $(DOCKER) compose --profile stress -f docker-compose.yml up -d

start-tss-migration-test: zetanode
@echo "--> Starting migration test"
export E2E_ARGS="--test-tss-migration" && \
cd contrib/localnet/ && $(DOCKER) compose up -d

###############################################################################
### Upgrade Tests ###
###############################################################################
Expand Down
2 changes: 2 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@
* [2369](https://github.com/zeta-chain/node/pull/2369) - fix random cross-chain swap failure caused by using tiny UTXO
* [2549](https://github.com/zeta-chain/node/pull/2459) - add separate accounts for each policy in e2e tests
* [2415](https://github.com/zeta-chain/node/pull/2415) - add e2e test for upgrade and test admin functionalities
* [2440](https://github.com/zeta-chain/node/pull/2440) - Add e2e test for TSS migration


### Fixes

Expand Down
129 changes: 128 additions & 1 deletion cmd/zetaclientd-supervisor/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"runtime"
"strings"
"sync"
"syscall"
"time"

"github.com/cosmos/cosmos-sdk/client/grpc/tmservice"
Expand All @@ -20,6 +21,7 @@ import (
"github.com/rs/zerolog"
"google.golang.org/grpc"

observertypes "github.com/zeta-chain/zetacore/x/observer/types"
"github.com/zeta-chain/zetacore/zetaclient/config"
)

Expand Down Expand Up @@ -66,6 +68,7 @@ type zetaclientdSupervisor struct {
upgradesDir string
upgradePlanName string
enableAutoDownload bool
restartChan chan os.Signal
}

func newZetaclientdSupervisor(
Expand All @@ -81,19 +84,24 @@ func newZetaclientdSupervisor(
if err != nil {
return nil, fmt.Errorf("grpc dial: %w", err)
}

// these signals will result in the supervisor process only restarting zetaclientd
restartChan := make(chan os.Signal, 1)
return &zetaclientdSupervisor{
zetacoredConn: conn,
logger: logger,
reloadSignals: make(chan bool, 1),
upgradesDir: defaultUpgradesDir,
enableAutoDownload: enableAutoDownload,
restartChan: restartChan,
}, nil
}

func (s *zetaclientdSupervisor) Start(ctx context.Context) {
go s.watchForVersionChanges(ctx)
go s.handleCoreUpgradePlan(ctx)
go s.handleNewKeygen(ctx)
go s.handleNewTSSKeyGeneration(ctx)
go s.handleTSSUpdate(ctx)
}

func (s *zetaclientdSupervisor) WaitForReloadSignal(ctx context.Context) {
Expand Down Expand Up @@ -169,6 +177,125 @@ func (s *zetaclientdSupervisor) watchForVersionChanges(ctx context.Context) {
}
}

func (s *zetaclientdSupervisor) handleTSSUpdate(ctx context.Context) {
maxRetries := 11
retryInterval := 5 * time.Second

// TODO : use retry library under pkg/retry
// https://github.com/zeta-chain/node/issues/2492
for i := 0; i < maxRetries; i++ {
client := observertypes.NewQueryClient(s.zetacoredConn)
tss, err := client.TSS(ctx, &observertypes.QueryGetTSSRequest{})
if err != nil {
s.logger.Warn().Err(err).Msg("unable to get original tss")
time.Sleep(retryInterval)
continue
}
i = 0
for {
select {
case <-time.After(time.Second):
case <-ctx.Done():
return
}
tssNew, err := client.TSS(ctx, &observertypes.QueryGetTSSRequest{})
if err != nil {
s.logger.Warn().Err(err).Msg("unable to get tss")
continue
}

if tssNew.TSS.TssPubkey == tss.TSS.TssPubkey {
continue
}

tss = tssNew
s.logger.Info().
Msgf("tss address is updated from %s to %s", tss.TSS.TssPubkey, tssNew.TSS.TssPubkey)
time.Sleep(6 * time.Second)
s.logger.Info().Msg("restarting zetaclientd to update tss address")
s.restartChan <- syscall.SIGHUP
}
}
s.logger.Warn().Msg("handleTSSUpdate exiting without success")
}

func (s *zetaclientdSupervisor) handleNewTSSKeyGeneration(ctx context.Context) {
maxRetries := 11
retryInterval := 5 * time.Second

// TODO : use retry library under pkg/retry
for i := 0; i < maxRetries; i++ {
client := observertypes.NewQueryClient(s.zetacoredConn)
alltss, err := client.TssHistory(ctx, &observertypes.QueryTssHistoryRequest{})
if err != nil {
s.logger.Warn().Err(err).Msg("unable to get tss original history")
time.Sleep(retryInterval)
continue
}
i = 0
tssLenCurrent := len(alltss.TssList)
for {
select {
case <-time.After(time.Second):
case <-ctx.Done():
return
}
tssListNew, err := client.TssHistory(ctx, &observertypes.QueryTssHistoryRequest{})
if err != nil {
s.logger.Warn().Err(err).Msg("unable to get tss new history")
continue
}
tssLenUpdated := len(tssListNew.TssList)

if tssLenUpdated == tssLenCurrent {
continue
}
if tssLenUpdated < tssLenCurrent {
tssLenCurrent = len(tssListNew.TssList)
continue
}

tssLenCurrent = tssLenUpdated
s.logger.Info().Msgf("tss list updated from %d to %d", tssLenCurrent, tssLenUpdated)
time.Sleep(5 * time.Second)
s.logger.Info().Msg("restarting zetaclientd to update tss list")
s.restartChan <- syscall.SIGHUP
}
}
s.logger.Warn().Msg("handleNewTSSKeyGeneration exiting without success")
}

func (s *zetaclientdSupervisor) handleNewKeygen(ctx context.Context) {
client := observertypes.NewQueryClient(s.zetacoredConn)
prevKeygenBlock := int64(0)
for {
select {
case <-time.After(time.Second):
case <-ctx.Done():
return
}
resp, err := client.Keygen(ctx, &observertypes.QueryGetKeygenRequest{})
if err != nil {
s.logger.Warn().Err(err).Msg("unable to get keygen")
continue
}
if resp.Keygen == nil {
s.logger.Warn().Err(err).Msg("keygen is nil")
continue
}

if resp.Keygen.Status != observertypes.KeygenStatus_PendingKeygen {
continue
}
keygenBlock := resp.Keygen.BlockNumber
if prevKeygenBlock == keygenBlock {
continue
}
prevKeygenBlock = keygenBlock
s.logger.Info().Msgf("got new keygen at block %d", keygenBlock)
s.restartChan <- syscall.SIGHUP
}
}
func (s *zetaclientdSupervisor) handleCoreUpgradePlan(ctx context.Context) {
client := upgradetypes.NewQueryClient(s.zetacoredConn)

Expand Down
8 changes: 3 additions & 5 deletions cmd/zetaclientd-supervisor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ func main() {
shutdownChan := make(chan os.Signal, 1)
signal.Notify(shutdownChan, syscall.SIGINT, syscall.SIGTERM)

// these signals will result in the supervisor process only restarting zetaclientd
restartChan := make(chan os.Signal, 1)
signal.Notify(restartChan, syscall.SIGHUP)

hotkeyPassword, tssPassword, err := promptPasswords()
if err != nil {
logger.Error().Err(err).Msg("unable to get passwords")
Expand All @@ -53,6 +49,8 @@ func main() {
os.Exit(1)
}
supervisor.Start(ctx)
// listen for SIGHUP to trigger a restart of zetaclientd
signal.Notify(supervisor.restartChan, syscall.SIGHUP)

shouldRestart := true
for shouldRestart {
Expand Down Expand Up @@ -82,7 +80,7 @@ func main() {
select {
case <-ctx.Done():
return nil
case sig := <-restartChan:
case sig := <-supervisor.restartChan:
logger.Info().Msgf("got signal %d, sending SIGINT to zetaclientd", sig)
case sig := <-shutdownChan:
logger.Info().Msgf("got signal %d, shutting down", sig)
Expand Down
Loading

0 comments on commit 8e1cfa7

Please sign in to comment.