From dc96cd0403439372e09161899eed1f30af7d55a2 Mon Sep 17 00:00:00 2001 From: Siyuan Zhang Date: Tue, 14 May 2024 15:36:19 -0700 Subject: [PATCH] robustness: add mix version option in exploratoryScenarios. Signed-off-by: Siyuan Zhang --- tests/framework/e2e/cluster.go | 42 +++++++------- tests/framework/e2e/etcd_process.go | 15 +++++ tests/robustness/failpoint/cluster.go | 5 +- tests/robustness/failpoint/failpoint.go | 27 ++++----- tests/robustness/failpoint/gofail.go | 1 + tests/robustness/failpoint/network.go | 4 +- tests/robustness/main_test.go | 22 +++---- .../options/server_config_options.go | 4 ++ tests/robustness/scenarios.go | 58 +++++++++---------- 9 files changed, 101 insertions(+), 77 deletions(-) diff --git a/tests/framework/e2e/cluster.go b/tests/framework/e2e/cluster.go index 8f3a102c3059..e9f4b6388fea 100644 --- a/tests/framework/e2e/cluster.go +++ b/tests/framework/e2e/cluster.go @@ -570,27 +570,6 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfig(tb testing.TB, i in args = append(args, "--discovery="+cfg.Discovery) } - defaultValues := values(*embed.NewConfig()) - overrideValues := values(cfg.ServerConfig) - for flag, value := range overrideValues { - if defaultValue := defaultValues[flag]; value == "" || value == defaultValue { - continue - } - if flag == "experimental-snapshot-catchup-entries" && !(cfg.Version == CurrentVersion || (cfg.Version == MinorityLastVersion && i <= cfg.ClusterSize/2) || (cfg.Version == QuorumLastVersion && i > cfg.ClusterSize/2)) { - continue - } - args = append(args, fmt.Sprintf("--%s=%s", flag, value)) - } - envVars := map[string]string{} - for key, value := range cfg.EnvVars { - envVars[key] = value - } - var gofailPort int - if cfg.GoFailEnabled { - gofailPort = (i+1)*10000 + 2381 - envVars["GOFAIL_HTTP"] = fmt.Sprintf("127.0.0.1:%d", gofailPort) - } - var execPath string switch cfg.Version { case CurrentVersion: @@ -613,6 +592,27 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfig(tb testing.TB, i in panic(fmt.Sprintf("Unknown cluster version %v", cfg.Version)) } + defaultValues := values(*embed.NewConfig()) + overrideValues := values(cfg.ServerConfig) + for flag, value := range overrideValues { + if defaultValue := defaultValues[flag]; value == "" || value == defaultValue { + continue + } + if flag == "experimental-snapshot-catchup-entries" && !CouldSetSnapshotCatchupEntries(execPath) { + continue + } + args = append(args, fmt.Sprintf("--%s=%s", flag, value)) + } + envVars := map[string]string{} + for key, value := range cfg.EnvVars { + envVars[key] = value + } + var gofailPort int + if cfg.GoFailEnabled { + gofailPort = (i+1)*10000 + 2381 + envVars["GOFAIL_HTTP"] = fmt.Sprintf("127.0.0.1:%d", gofailPort) + } + return &EtcdServerProcessConfig{ lg: cfg.Logger, ExecPath: execPath, diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index f9d2089a3e3b..170828a4b6a4 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -508,3 +508,18 @@ func GetVersionFromBinary(binaryPath string) (*semver.Version, error) { return nil, fmt.Errorf("could not find version in binary output of %s, lines outputted were %v", binaryPath, lines) } + +func CouldSetSnapshotCatchupEntries(execPath string) bool { + if !fileutil.Exist(execPath) { + // default to true if the binary does not exist, because binary might not exist for unit test, + // which does not really matter because "experimental-snapshot-catchup-entries" can be set for v3.6 and v3.5. + return true + } + v, err := GetVersionFromBinary(execPath) + if err != nil { + panic(err) + } + // snapshot-catchup-entries flag was backported in https://github.com/etcd-io/etcd/pull/17808 + v3_5_13 := semver.Version{Major: 3, Minor: 5, Patch: 13} + return v.Compare(v3_5_13) >= 0 +} diff --git a/tests/robustness/failpoint/cluster.go b/tests/robustness/failpoint/cluster.go index 2a68fcb73ea6..502b866236b9 100644 --- a/tests/robustness/failpoint/cluster.go +++ b/tests/robustness/failpoint/cluster.go @@ -135,8 +135,9 @@ func (f memberReplace) Name() string { return "MemberReplace" } -func (f memberReplace) Available(config e2e.EtcdProcessClusterConfig, _ e2e.EtcdProcess) bool { - return config.ClusterSize > 1 +func (f memberReplace) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool { + // a lower etcd version may not be able to join a cluster with higher cluster version. + return config.ClusterSize > 1 && member.Config().ExecPath != e2e.BinPath.EtcdLastRelease } func getID(ctx context.Context, cc clientv3.Cluster, name string) (id uint64, found bool, err error) { diff --git a/tests/robustness/failpoint/failpoint.go b/tests/robustness/failpoint/failpoint.go index 14e6ddf7e940..1ddbcfc7e134 100644 --- a/tests/robustness/failpoint/failpoint.go +++ b/tests/robustness/failpoint/failpoint.go @@ -36,20 +36,21 @@ const ( var ( allFailpoints = []Failpoint{ - KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, - DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic, - BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic, - BackendAfterWritebackBufPanic, CompactBeforeCommitScheduledCompactPanic, CompactAfterCommitScheduledCompactPanic, - CompactBeforeSetFinishedCompactPanic, CompactAfterSetFinishedCompactPanic, CompactBeforeCommitBatchPanic, - CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork, - RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic, - RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot, - BeforeApplyOneConfChangeSleep, - MemberReplace, + // KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, + // DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic, + // BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic, + // BackendAfterWritebackBufPanic, CompactBeforeCommitScheduledCompactPanic, CompactAfterCommitScheduledCompactPanic, + // CompactBeforeSetFinishedCompactPanic, CompactAfterSetFinishedCompactPanic, CompactBeforeCommitBatchPanic, + // CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork, + // RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic, + // RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot, + // BeforeApplyOneConfChangeSleep, + // MemberReplace, DropPeerNetwork, - RaftBeforeSaveSleep, - RaftAfterSaveSleep, - ApplyBeforeOpenSnapshot, + // RaftBeforeSaveSleep, + // RaftAfterSaveSleep, + // ApplyBeforeOpenSnapshot, + RaftAfterApplySnapPanic, } ) diff --git a/tests/robustness/failpoint/gofail.go b/tests/robustness/failpoint/gofail.go index 2e85798bacc1..c37ea0432a57 100644 --- a/tests/robustness/failpoint/gofail.go +++ b/tests/robustness/failpoint/gofail.go @@ -105,6 +105,7 @@ func (f goPanicFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logg reports = append(reports, r...) } } + fmt.Printf("sizhangDebug: Waiting for member %s to exit\n", member.Config().ExecPath) lg.Info("Waiting for member to exit", zap.String("member", member.Config().Name)) err = member.Wait(ctx) if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { diff --git a/tests/robustness/failpoint/network.go b/tests/robustness/failpoint/network.go index d202454bcdf5..2bd5789eeb8f 100644 --- a/tests/robustness/failpoint/network.go +++ b/tests/robustness/failpoint/network.go @@ -16,6 +16,7 @@ package failpoint import ( "context" + "fmt" "math/rand" "testing" "time" @@ -59,7 +60,8 @@ func (tb triggerBlackhole) Trigger(ctx context.Context, t *testing.T, member e2e func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, process e2e.EtcdProcess) bool { // Avoid triggering failpoint if waiting for failpoint would take too long to fit into timeout. // Number of required entries for snapshot depends on etcd configuration. - if tb.waitTillSnapshot && entriesToGuaranteeSnapshot(config) > 200 { + fmt.Printf("sizhangDebug: entriesToGuaranteeSnapshot=%d, ExecPath=%s\n", entriesToGuaranteeSnapshot(config), process.Config().ExecPath) + if tb.waitTillSnapshot && (entriesToGuaranteeSnapshot(config) > 200 || !e2e.CouldSetSnapshotCatchupEntries(process.Config().ExecPath)) { return false } return config.ClusterSize > 1 && process.PeerProxy() != nil diff --git a/tests/robustness/main_test.go b/tests/robustness/main_test.go index 1d078574ff2d..7312d834d5b6 100644 --- a/tests/robustness/main_test.go +++ b/tests/robustness/main_test.go @@ -51,17 +51,17 @@ func TestRobustnessExploratory(t *testing.T) { } } -func TestRobustnessRegression(t *testing.T) { - testRunner.BeforeTest(t) - for _, scenario := range regressionScenarios(t) { - t.Run(scenario.name, func(t *testing.T) { - lg := zaptest.NewLogger(t) - scenario.cluster.Logger = lg - ctx := context.Background() - testRobustness(ctx, t, lg, scenario) - }) - } -} +// func TestRobustnessRegression(t *testing.T) { +// testRunner.BeforeTest(t) +// for _, scenario := range regressionScenarios(t) { +// t.Run(scenario.name, func(t *testing.T) { +// lg := zaptest.NewLogger(t) +// scenario.cluster.Logger = lg +// ctx := context.Background() +// testRobustness(ctx, t, lg, scenario) +// }) +// } +// } func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testScenario) { r := report.TestReport{Logger: lg} diff --git a/tests/robustness/options/server_config_options.go b/tests/robustness/options/server_config_options.go index 5018d1bf90aa..4471a869fd07 100644 --- a/tests/robustness/options/server_config_options.go +++ b/tests/robustness/options/server_config_options.go @@ -49,3 +49,7 @@ func WithExperimentalWatchProgressNotifyInterval(input ...time.Duration) e2e.EPC c.ServerConfig.ExperimentalWatchProgressNotifyInterval = input[internalRand.Intn(len(input))] } } + +func WithVersion(input ...e2e.ClusterVersion) e2e.EPClusterOption { + return func(c *e2e.EtcdProcessClusterConfig) { c.Version = input[internalRand.Intn(len(input))] } +} diff --git a/tests/robustness/scenarios.go b/tests/robustness/scenarios.go index 7cd121b6e39f..34b574d6a41b 100644 --- a/tests/robustness/scenarios.go +++ b/tests/robustness/scenarios.go @@ -19,9 +19,8 @@ import ( "testing" "time" - "github.com/coreos/go-semver/semver" - "go.etcd.io/etcd/api/v3/version" + "go.etcd.io/etcd/client/pkg/v3/fileutil" "go.etcd.io/etcd/tests/v3/framework/e2e" "go.etcd.io/etcd/tests/v3/robustness/failpoint" "go.etcd.io/etcd/tests/v3/robustness/options" @@ -61,12 +60,8 @@ type testScenario struct { watch watchConfig } -func exploratoryScenarios(t *testing.T) []testScenario { - v, err := e2e.GetVersionFromBinary(e2e.BinPath.Etcd) - if err != nil { - t.Fatalf("Failed checking etcd version binary, binary: %q, err: %v", e2e.BinPath.Etcd, err) - } - enableLazyFS := e2e.BinPath.LazyFSAvailable() +func exploratoryScenarios(_ *testing.T) []testScenario { + // enableLazyFS := e2e.BinPath.LazyFSAvailable() randomizableOptions := []e2e.EPClusterOption{ options.WithClusterOptionGroups( options.ClusterOptions{options.WithTickMs(29), options.WithElectionMs(271)}, @@ -74,6 +69,10 @@ func exploratoryScenarios(t *testing.T) []testScenario { options.ClusterOptions{options.WithTickMs(100), options.WithElectionMs(2000)}), } + // 66% current version, 33% MinorityLastVersion and QuorumLastVersion + mixedVersionOption := options.WithVersion(e2e.CurrentVersion, e2e.CurrentVersion, e2e.CurrentVersion, + e2e.CurrentVersion, e2e.MinorityLastVersion, e2e.QuorumLastVersion) + baseOptions := []e2e.EPClusterOption{ options.WithSnapshotCount(50, 100, 1000), options.WithSubsetOptions(randomizableOptions...), @@ -81,34 +80,36 @@ func exploratoryScenarios(t *testing.T) []testScenario { e2e.WithCompactionBatchLimit(100), e2e.WithWatchProcessNotifyInterval(100 * time.Millisecond), } - // snapshot-catchup-entries flag was backported in https://github.com/etcd-io/etcd/pull/17808 - v3_5_13 := semver.Version{Major: 3, Minor: 5, Patch: 13} - if v.Compare(v3_5_13) >= 0 { + + if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) { baseOptions = append(baseOptions, e2e.WithSnapshotCatchUpEntries(100)) } scenarios := []testScenario{} - for _, tp := range trafficProfiles { - name := filepath.Join(tp.Traffic.Name(), tp.Profile.Name, "ClusterOfSize1") - clusterOfSize1Options := baseOptions - clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithClusterSize(1)) - // Add LazyFS only for traffic with lower QPS as it uses a lot of CPU lowering minimal QPS. - if enableLazyFS && tp.Profile.MinimalQPS <= 100 { - clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithLazyFSEnabled(true)) - name = filepath.Join(name, "LazyFS") - } - scenarios = append(scenarios, testScenario{ - name: name, - traffic: tp.Traffic, - profile: tp.Profile, - cluster: *e2e.NewConfig(clusterOfSize1Options...), - }) - } + // for _, tp := range trafficProfiles { + // name := filepath.Join(tp.Traffic.Name(), tp.Profile.Name, "ClusterOfSize1") + // clusterOfSize1Options := baseOptions + // clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithClusterSize(1)) + // // Add LazyFS only for traffic with lower QPS as it uses a lot of CPU lowering minimal QPS. + // if enableLazyFS && tp.Profile.MinimalQPS <= 100 { + // clusterOfSize1Options = append(clusterOfSize1Options, e2e.WithLazyFSEnabled(true)) + // name = filepath.Join(name, "LazyFS") + // } + // scenarios = append(scenarios, testScenario{ + // name: name, + // traffic: tp.Traffic, + // profile: tp.Profile, + // cluster: *e2e.NewConfig(clusterOfSize1Options...), + // }) + // } for _, tp := range trafficProfiles { name := filepath.Join(tp.Traffic.Name(), tp.Profile.Name, "ClusterOfSize3") clusterOfSize3Options := baseOptions clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithIsPeerTLS(true)) clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithPeerProxy(true)) + if fileutil.Exist(e2e.BinPath.EtcdLastRelease) { + clusterOfSize3Options = append(clusterOfSize3Options, mixedVersionOption) + } scenarios = append(scenarios, testScenario{ name: name, traffic: tp.Traffic, @@ -172,8 +173,7 @@ func regressionScenarios(t *testing.T) []testScenario { e2e.WithPeerProxy(true), e2e.WithIsPeerTLS(true), } - v3_5_13 := semver.Version{Major: 3, Minor: 5, Patch: 13} - if v.Compare(v3_5_13) >= 0 { + if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) { opts = append(opts, e2e.WithSnapshotCatchUpEntries(100)) } scenarios = append(scenarios, testScenario{