Skip to content

Commit

Permalink
robustness: add mix version option in exploratoryScenarios.
Browse files Browse the repository at this point in the history
Signed-off-by: Siyuan Zhang <[email protected]>
  • Loading branch information
siyuanfoundation committed May 15, 2024
1 parent 62da593 commit dc96cd0
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 77 deletions.
42 changes: 21 additions & 21 deletions tests/framework/e2e/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,27 +570,6 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfig(tb testing.TB, i in
args = append(args, "--discovery="+cfg.Discovery)
}

defaultValues := values(*embed.NewConfig())
overrideValues := values(cfg.ServerConfig)
for flag, value := range overrideValues {
if defaultValue := defaultValues[flag]; value == "" || value == defaultValue {
continue
}
if flag == "experimental-snapshot-catchup-entries" && !(cfg.Version == CurrentVersion || (cfg.Version == MinorityLastVersion && i <= cfg.ClusterSize/2) || (cfg.Version == QuorumLastVersion && i > cfg.ClusterSize/2)) {
continue
}
args = append(args, fmt.Sprintf("--%s=%s", flag, value))
}
envVars := map[string]string{}
for key, value := range cfg.EnvVars {
envVars[key] = value
}
var gofailPort int
if cfg.GoFailEnabled {
gofailPort = (i+1)*10000 + 2381
envVars["GOFAIL_HTTP"] = fmt.Sprintf("127.0.0.1:%d", gofailPort)
}

var execPath string
switch cfg.Version {
case CurrentVersion:
Expand All @@ -613,6 +592,27 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfig(tb testing.TB, i in
panic(fmt.Sprintf("Unknown cluster version %v", cfg.Version))
}

defaultValues := values(*embed.NewConfig())
overrideValues := values(cfg.ServerConfig)
for flag, value := range overrideValues {
if defaultValue := defaultValues[flag]; value == "" || value == defaultValue {
continue
}
if flag == "experimental-snapshot-catchup-entries" && !CouldSetSnapshotCatchupEntries(execPath) {
continue
}
args = append(args, fmt.Sprintf("--%s=%s", flag, value))
}
envVars := map[string]string{}
for key, value := range cfg.EnvVars {
envVars[key] = value
}
var gofailPort int
if cfg.GoFailEnabled {
gofailPort = (i+1)*10000 + 2381
envVars["GOFAIL_HTTP"] = fmt.Sprintf("127.0.0.1:%d", gofailPort)
}

return &EtcdServerProcessConfig{
lg: cfg.Logger,
ExecPath: execPath,
Expand Down
15 changes: 15 additions & 0 deletions tests/framework/e2e/etcd_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,18 @@ func GetVersionFromBinary(binaryPath string) (*semver.Version, error) {

return nil, fmt.Errorf("could not find version in binary output of %s, lines outputted were %v", binaryPath, lines)
}

func CouldSetSnapshotCatchupEntries(execPath string) bool {
if !fileutil.Exist(execPath) {
// default to true if the binary does not exist, because binary might not exist for unit test,
// which does not really matter because "experimental-snapshot-catchup-entries" can be set for v3.6 and v3.5.
return true
}
v, err := GetVersionFromBinary(execPath)
if err != nil {
panic(err)
}
// snapshot-catchup-entries flag was backported in https://github.com/etcd-io/etcd/pull/17808
v3_5_13 := semver.Version{Major: 3, Minor: 5, Patch: 13}
return v.Compare(v3_5_13) >= 0
}
5 changes: 3 additions & 2 deletions tests/robustness/failpoint/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ func (f memberReplace) Name() string {
return "MemberReplace"
}

func (f memberReplace) Available(config e2e.EtcdProcessClusterConfig, _ e2e.EtcdProcess) bool {
return config.ClusterSize > 1
func (f memberReplace) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool {
// a lower etcd version may not be able to join a cluster with higher cluster version.
return config.ClusterSize > 1 && member.Config().ExecPath != e2e.BinPath.EtcdLastRelease
}

func getID(ctx context.Context, cc clientv3.Cluster, name string) (id uint64, found bool, err error) {
Expand Down
27 changes: 14 additions & 13 deletions tests/robustness/failpoint/failpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,21 @@ const (

var (
allFailpoints = []Failpoint{
KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic,
DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic,
BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic,
BackendAfterWritebackBufPanic, CompactBeforeCommitScheduledCompactPanic, CompactAfterCommitScheduledCompactPanic,
CompactBeforeSetFinishedCompactPanic, CompactAfterSetFinishedCompactPanic, CompactBeforeCommitBatchPanic,
CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork,
RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic,
RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot,
BeforeApplyOneConfChangeSleep,
MemberReplace,
// KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic,
// DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic,
// BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic,
// BackendAfterWritebackBufPanic, CompactBeforeCommitScheduledCompactPanic, CompactAfterCommitScheduledCompactPanic,
// CompactBeforeSetFinishedCompactPanic, CompactAfterSetFinishedCompactPanic, CompactBeforeCommitBatchPanic,
// CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork,
// RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic,
// RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot,
// BeforeApplyOneConfChangeSleep,
// MemberReplace,
DropPeerNetwork,
RaftBeforeSaveSleep,
RaftAfterSaveSleep,
ApplyBeforeOpenSnapshot,
// RaftBeforeSaveSleep,
// RaftAfterSaveSleep,
// ApplyBeforeOpenSnapshot,
RaftAfterApplySnapPanic,
}
)

Expand Down
1 change: 1 addition & 0 deletions tests/robustness/failpoint/gofail.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func (f goPanicFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logg
reports = append(reports, r...)
}
}
fmt.Printf("sizhangDebug: Waiting for member %s to exit\n", member.Config().ExecPath)
lg.Info("Waiting for member to exit", zap.String("member", member.Config().Name))
err = member.Wait(ctx)
if err != nil && !strings.Contains(err.Error(), "unexpected exit code") {
Expand Down
4 changes: 3 additions & 1 deletion tests/robustness/failpoint/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package failpoint

import (
"context"
"fmt"
"math/rand"
"testing"
"time"
Expand Down Expand Up @@ -59,7 +60,8 @@ func (tb triggerBlackhole) Trigger(ctx context.Context, t *testing.T, member e2e
func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, process e2e.EtcdProcess) bool {
// Avoid triggering failpoint if waiting for failpoint would take too long to fit into timeout.
// Number of required entries for snapshot depends on etcd configuration.
if tb.waitTillSnapshot && entriesToGuaranteeSnapshot(config) > 200 {
fmt.Printf("sizhangDebug: entriesToGuaranteeSnapshot=%d, ExecPath=%s\n", entriesToGuaranteeSnapshot(config), process.Config().ExecPath)
if tb.waitTillSnapshot && (entriesToGuaranteeSnapshot(config) > 200 || !e2e.CouldSetSnapshotCatchupEntries(process.Config().ExecPath)) {
return false
}
return config.ClusterSize > 1 && process.PeerProxy() != nil
Expand Down
22 changes: 11 additions & 11 deletions tests/robustness/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ func TestRobustnessExploratory(t *testing.T) {
}
}

func TestRobustnessRegression(t *testing.T) {
testRunner.BeforeTest(t)
for _, scenario := range regressionScenarios(t) {
t.Run(scenario.name, func(t *testing.T) {
lg := zaptest.NewLogger(t)
scenario.cluster.Logger = lg
ctx := context.Background()
testRobustness(ctx, t, lg, scenario)
})
}
}
// func TestRobustnessRegression(t *testing.T) {
// testRunner.BeforeTest(t)
// for _, scenario := range regressionScenarios(t) {
// t.Run(scenario.name, func(t *testing.T) {
// lg := zaptest.NewLogger(t)
// scenario.cluster.Logger = lg
// ctx := context.Background()
// testRobustness(ctx, t, lg, scenario)
// })
// }
// }

func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testScenario) {
r := report.TestReport{Logger: lg}
Expand Down
4 changes: 4 additions & 0 deletions tests/robustness/options/server_config_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,7 @@ func WithExperimentalWatchProgressNotifyInterval(input ...time.Duration) e2e.EPC
c.ServerConfig.ExperimentalWatchProgressNotifyInterval = input[internalRand.Intn(len(input))]
}
}

func WithVersion(input ...e2e.ClusterVersion) e2e.EPClusterOption {
return func(c *e2e.EtcdProcessClusterConfig) { c.Version = input[internalRand.Intn(len(input))] }
}
58 changes: 29 additions & 29 deletions tests/robustness/scenarios.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ import (
"testing"
"time"

"github.com/coreos/go-semver/semver"

"go.etcd.io/etcd/api/v3/version"
"go.etcd.io/etcd/client/pkg/v3/fileutil"
"go.etcd.io/etcd/tests/v3/framework/e2e"
"go.etcd.io/etcd/tests/v3/robustness/failpoint"
"go.etcd.io/etcd/tests/v3/robustness/options"
Expand Down Expand Up @@ -61,54 +60,56 @@ type testScenario struct {
watch watchConfig
}

func exploratoryScenarios(t *testing.T) []testScenario {
v, err := e2e.GetVersionFromBinary(e2e.BinPath.Etcd)
if err != nil {
t.Fatalf("Failed checking etcd version binary, binary: %q, err: %v", e2e.BinPath.Etcd, err)
}
enableLazyFS := e2e.BinPath.LazyFSAvailable()
func exploratoryScenarios(_ *testing.T) []testScenario {
// enableLazyFS := e2e.BinPath.LazyFSAvailable()
randomizableOptions := []e2e.EPClusterOption{
options.WithClusterOptionGroups(
options.ClusterOptions{options.WithTickMs(29), options.WithElectionMs(271)},
options.ClusterOptions{options.WithTickMs(101), options.WithElectionMs(521)},
options.ClusterOptions{options.WithTickMs(100), options.WithElectionMs(2000)}),
}

// 66% current version, 33% MinorityLastVersion and QuorumLastVersion
mixedVersionOption := options.WithVersion(e2e.CurrentVersion, e2e.CurrentVersion, e2e.CurrentVersion,
e2e.CurrentVersion, e2e.MinorityLastVersion, e2e.QuorumLastVersion)

baseOptions := []e2e.EPClusterOption{
options.WithSnapshotCount(50, 100, 1000),
options.WithSubsetOptions(randomizableOptions...),
e2e.WithGoFailEnabled(true),
e2e.WithCompactionBatchLimit(100),
e2e.WithWatchProcessNotifyInterval(100 * time.Millisecond),
}
// snapshot-catchup-entries flag was backported in https://github.com/etcd-io/etcd/pull/17808
v3_5_13 := semver.Version{Major: 3, Minor: 5, Patch: 13}
if v.Compare(v3_5_13) >= 0 {

if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) {
baseOptions = append(baseOptions, e2e.WithSnapshotCatchUpEntries(100))
}
scenarios := []testScenario{}
for _, tp := range trafficProfiles {
name := filepath.Join(tp.Traffic.Name(), tp.Profile.Name, "ClusterOfSize1")
clusterOfSize1Options := baseOptions
clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithClusterSize(1))
// Add LazyFS only for traffic with lower QPS as it uses a lot of CPU lowering minimal QPS.
if enableLazyFS && tp.Profile.MinimalQPS <= 100 {
clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithLazyFSEnabled(true))
name = filepath.Join(name, "LazyFS")
}
scenarios = append(scenarios, testScenario{
name: name,
traffic: tp.Traffic,
profile: tp.Profile,
cluster: *e2e.NewConfig(clusterOfSize1Options...),
})
}
// for _, tp := range trafficProfiles {
// name := filepath.Join(tp.Traffic.Name(), tp.Profile.Name, "ClusterOfSize1")
// clusterOfSize1Options := baseOptions
// clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithClusterSize(1))
// // Add LazyFS only for traffic with lower QPS as it uses a lot of CPU lowering minimal QPS.
// if enableLazyFS && tp.Profile.MinimalQPS <= 100 {
// clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithLazyFSEnabled(true))
// name = filepath.Join(name, "LazyFS")
// }
// scenarios = append(scenarios, testScenario{
// name: name,
// traffic: tp.Traffic,
// profile: tp.Profile,
// cluster: *e2e.NewConfig(clusterOfSize1Options...),
// })
// }

for _, tp := range trafficProfiles {
name := filepath.Join(tp.Traffic.Name(), tp.Profile.Name, "ClusterOfSize3")
clusterOfSize3Options := baseOptions
clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithIsPeerTLS(true))
clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithPeerProxy(true))
if fileutil.Exist(e2e.BinPath.EtcdLastRelease) {
clusterOfSize3Options = append(clusterOfSize3Options, mixedVersionOption)
}
scenarios = append(scenarios, testScenario{
name: name,
traffic: tp.Traffic,
Expand Down Expand Up @@ -172,8 +173,7 @@ func regressionScenarios(t *testing.T) []testScenario {
e2e.WithPeerProxy(true),
e2e.WithIsPeerTLS(true),
}
v3_5_13 := semver.Version{Major: 3, Minor: 5, Patch: 13}
if v.Compare(v3_5_13) >= 0 {
if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) {
opts = append(opts, e2e.WithSnapshotCatchUpEntries(100))
}
scenarios = append(scenarios, testScenario{
Expand Down

0 comments on commit dc96cd0

Please sign in to comment.