From d04aec6847589d02ba680bd6104e1342838d5847 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 24 Oct 2024 14:04:17 -0600 Subject: [PATCH 1/8] Black box test to reproduce unreachable error in backups Signed-off-by: Florent Poinsard --- go/vt/mysqlctl/backup_blackbox_test.go | 105 +++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/go/vt/mysqlctl/backup_blackbox_test.go b/go/vt/mysqlctl/backup_blackbox_test.go index 15244fb8782..a1851b57388 100644 --- a/go/vt/mysqlctl/backup_blackbox_test.go +++ b/go/vt/mysqlctl/backup_blackbox_test.go @@ -406,6 +406,111 @@ func TestExecuteBackupWithCanceledContext(t *testing.T) { assert.Equal(t, mysqlctl.BackupUnusable, backupResult) } +// This test triggers a certain code path that only happens a backup file fails to be backed up, +// only and only if, all the other backup files have either started or finished. When we reach +// this scenario, files no longer try to acquire the semaphore and thus the backup cannot fail +// because of context deadline when acquiring it. At this point, the only place where the backup +// can fail, is if the return of be.backupFiles fails, and we record the error correctly. +// This test specifically test this scenario and arose because of issue .. +// The test does: +// 1. Create the backup and data directory +// 2. Create a keyspace and shard +// 3. Already create the last backup file that would be created +// 4. Remove all permissions on this file +// 5. Execute the restore +// 6. The restore must fail due to an error on file number 3 ("cannot add file: 3") +func TestExecuteBackupWithFailureOnLastFile(t *testing.T) { + ctx := utils.LeakCheckContext(t) + + // Set up local backup directory + id := fmt.Sprintf("%d", time.Now().UnixNano()) + backupRoot := fmt.Sprintf("testdata/builtinbackup_test_%s", id) + filebackupstorage.FileBackupStorageRoot = backupRoot + require.NoError(t, createBackupDir(backupRoot, "innodb", "log", "datadir")) + dataDir := path.Join(backupRoot, "datadir") + fmt.Println(dataDir) + // Add some files under data directory to force backup to execute semaphore acquire inside + // backupFiles() method (https://github.com/vitessio/vitess/blob/main/go/vt/mysqlctl/builtinbackupengine.go#L483). + require.NoError(t, createBackupDir(dataDir, "test1")) + require.NoError(t, createBackupDir(dataDir, "test2")) + require.NoError(t, createBackupFiles(path.Join(dataDir, "test1"), 2, "ibd")) + require.NoError(t, createBackupFiles(path.Join(dataDir, "test2"), 2, "ibd")) + defer os.RemoveAll(backupRoot) + + needIt, err := needInnoDBRedoLogSubdir() + require.NoError(t, err) + if needIt { + fpath := path.Join("log", mysql.DynamicRedoLogSubdir) + if err := createBackupDir(backupRoot, fpath); err != nil { + require.Failf(t, err.Error(), "failed to create directory: %s", fpath) + } + } + + // Set up topo + keyspace, shard := "mykeyspace", "-80" + ts := memorytopo.NewServer(ctx, "cell1") + defer ts.Close() + + require.NoError(t, ts.CreateKeyspace(ctx, keyspace, &topodata.Keyspace{})) + require.NoError(t, ts.CreateShard(ctx, keyspace, shard)) + + tablet := topo.NewTablet(100, "cell1", "mykeyspace-00-80-0100") + tablet.Keyspace = keyspace + tablet.Shard = shard + + require.NoError(t, ts.CreateTablet(ctx, tablet)) + + _, err = ts.UpdateShardFields(ctx, keyspace, shard, func(si *topo.ShardInfo) error { + si.PrimaryAlias = &topodata.TabletAlias{Uid: 100, Cell: "cell1"} + + now := time.Now() + si.PrimaryTermStartTime = &vttime.Time{Seconds: int64(now.Second()), Nanoseconds: int32(now.Nanosecond())} + + return nil + }) + + require.NoError(t, err) + + be := &mysqlctl.BuiltinBackupEngine{} + bh := filebackupstorage.NewBackupHandle(nil, "", "", false) + // Spin up a fake daemon to be used in backups. It needs to be allowed to receive: + // "STOP REPLICA", "START REPLICA", in that order. + fakedb := fakesqldb.New(t) + defer fakedb.Close() + mysqld := mysqlctl.NewFakeMysqlDaemon(fakedb) + defer mysqld.Close() + mysqld.ExpectedExecuteSuperQueryList = []string{"STOP REPLICA", "START REPLICA"} + + // With this setup, 4 backup files will be created (0, 1, 2, 3). For the last file (3), we create + // it in advance and remove all permission on the file so that the backup be.ExecuteBackup will not + // be able to override the file and thus will fail. Triggering the error mechanism after calling be.backupFile. + lastBackupFile := path.Join(backupRoot, "3") + f, err := os.Create(lastBackupFile) + require.NoError(t, err) + f.Write(make([]byte, 1024)) + require.NoError(t, os.Chmod(lastBackupFile, 0000)) + + backupResult, err := be.ExecuteBackup(ctx, mysqlctl.BackupParams{ + Logger: logutil.NewConsoleLogger(), + Mysqld: mysqld, + Cnf: &mysqlctl.Mycnf{ + InnodbDataHomeDir: path.Join(backupRoot, "innodb"), + InnodbLogGroupHomeDir: path.Join(backupRoot, "log"), + DataDir: path.Join(backupRoot, "datadir"), + }, + Stats: backupstats.NewFakeStats(), + Concurrency: 4, + HookExtraEnv: map[string]string{}, + TopoServer: ts, + Keyspace: keyspace, + Shard: shard, + MysqlShutdownTimeout: mysqlShutdownTimeout, + }, bh) + + require.ErrorContains(t, err, "cannot add file: 3") + assert.Equal(t, mysqlctl.BackupUnusable, backupResult) +} + // TestExecuteRestoreWithCanceledContext tests the ability of the restore function to gracefully handle cases where errors // occur due to various reasons, such as context timed-out. The process should not panic in these situations. func TestExecuteRestoreWithTimedOutContext(t *testing.T) { From 1f3e60e5d3a1c1fb022b656aacd22a6d4f3da4b8 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 24 Oct 2024 14:13:16 -0600 Subject: [PATCH 2/8] Add link to issue Signed-off-by: Florent Poinsard --- go/vt/mysqlctl/backup_blackbox_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/mysqlctl/backup_blackbox_test.go b/go/vt/mysqlctl/backup_blackbox_test.go index a1851b57388..c78e2eb42f1 100644 --- a/go/vt/mysqlctl/backup_blackbox_test.go +++ b/go/vt/mysqlctl/backup_blackbox_test.go @@ -411,7 +411,7 @@ func TestExecuteBackupWithCanceledContext(t *testing.T) { // this scenario, files no longer try to acquire the semaphore and thus the backup cannot fail // because of context deadline when acquiring it. At this point, the only place where the backup // can fail, is if the return of be.backupFiles fails, and we record the error correctly. -// This test specifically test this scenario and arose because of issue .. +// This test specifically test this scenario and arose because of issue https://github.com/vitessio/vitess/issues/17063 // The test does: // 1. Create the backup and data directory // 2. Create a keyspace and shard From 73e48eed7fa32cb64debc9aa72e78303d9c1a17c Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 24 Oct 2024 14:13:33 -0600 Subject: [PATCH 3/8] Use proper error variable Signed-off-by: Florent Poinsard --- go/vt/mysqlctl/builtinbackupengine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/mysqlctl/builtinbackupengine.go b/go/vt/mysqlctl/builtinbackupengine.go index 9c9403c948c..b9a9846d91e 100644 --- a/go/vt/mysqlctl/builtinbackupengine.go +++ b/go/vt/mysqlctl/builtinbackupengine.go @@ -645,7 +645,7 @@ func (be *BuiltinBackupEngine) backupFiles( name := fmt.Sprintf("%v", i) err := be.backupFile(ctxCancel, params, bh, fe, name) if err != nil { - bh.RecordError(acqErr) + bh.RecordError(err) cancel() } }(i) From 0e87ef5594df17cb9014a955b8dc7daee60eb85f Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 24 Oct 2024 14:36:56 -0600 Subject: [PATCH 4/8] Remove unwanted log in unit test Signed-off-by: Florent Poinsard --- go/vt/mysqlctl/backup_blackbox_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/vt/mysqlctl/backup_blackbox_test.go b/go/vt/mysqlctl/backup_blackbox_test.go index c78e2eb42f1..86f012d70a6 100644 --- a/go/vt/mysqlctl/backup_blackbox_test.go +++ b/go/vt/mysqlctl/backup_blackbox_test.go @@ -428,7 +428,6 @@ func TestExecuteBackupWithFailureOnLastFile(t *testing.T) { filebackupstorage.FileBackupStorageRoot = backupRoot require.NoError(t, createBackupDir(backupRoot, "innodb", "log", "datadir")) dataDir := path.Join(backupRoot, "datadir") - fmt.Println(dataDir) // Add some files under data directory to force backup to execute semaphore acquire inside // backupFiles() method (https://github.com/vitessio/vitess/blob/main/go/vt/mysqlctl/builtinbackupengine.go#L483). require.NoError(t, createBackupDir(dataDir, "test1")) From bcdfcba60b5176e396eca3979085ada5332a4d80 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 24 Oct 2024 18:35:08 -0600 Subject: [PATCH 5/8] Extract test to a non-race test Signed-off-by: Florent Poinsard --- go/vt/mysqlctl/backup_blackbox_race_test.go | 152 ++++++++++++++++++++ go/vt/mysqlctl/backup_blackbox_test.go | 104 -------------- 2 files changed, 152 insertions(+), 104 deletions(-) create mode 100644 go/vt/mysqlctl/backup_blackbox_race_test.go diff --git a/go/vt/mysqlctl/backup_blackbox_race_test.go b/go/vt/mysqlctl/backup_blackbox_race_test.go new file mode 100644 index 00000000000..4e81b74e7fe --- /dev/null +++ b/go/vt/mysqlctl/backup_blackbox_race_test.go @@ -0,0 +1,152 @@ +//go:build !race + +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package mysqlctl_test is the blackbox tests for package mysqlctl. +package mysqlctl_test + +import ( + "fmt" + "os" + "path" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/mysql/fakesqldb" + "vitess.io/vitess/go/test/utils" + "vitess.io/vitess/go/vt/logutil" + "vitess.io/vitess/go/vt/mysqlctl" + "vitess.io/vitess/go/vt/mysqlctl/backupstats" + "vitess.io/vitess/go/vt/mysqlctl/filebackupstorage" + "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/proto/vttime" + "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/topo/memorytopo" +) + +// This test triggers a certain code path that only happens a backup file fails to be backed up, +// only and only if, all the other backup files have either started or finished. When we reach +// this scenario, files no longer try to acquire the semaphore and thus the backup cannot fail +// because of context deadline when acquiring it. At this point, the only place where the backup +// can fail, is if the return of be.backupFiles fails, and we record the error correctly. +// This test specifically test this scenario and arose because of issue https://github.com/vitessio/vitess/issues/17063 +// The test does: +// 1. Create the backup and data directory +// 2. Create a keyspace and shard +// 3. Already create the last backup file that would be created +// 4. Remove all permissions on this file +// 5. Execute the restore +// 6. The restore must fail due to an error on file number 3 ("cannot add file: 3") +// +// This test is extracted into its own file that won't be run if we do 'go test -race' as this test +// exposes an old race condition that will be fixed soon after https://github.com/vitessio/vitess/pull/17062 +// Link to the race condition issue: https://github.com/vitessio/vitess/issues/17065 +func TestExecuteBackupWithFailureOnLastFile(t *testing.T) { + ctx := utils.LeakCheckContext(t) + + // Set up local backup directory + id := fmt.Sprintf("%d", time.Now().UnixNano()) + backupRoot := fmt.Sprintf("testdata/builtinbackup_test_%s", id) + filebackupstorage.FileBackupStorageRoot = backupRoot + require.NoError(t, createBackupDir(backupRoot, "innodb", "log", "datadir")) + dataDir := path.Join(backupRoot, "datadir") + // Add some files under data directory to force backup to execute semaphore acquire inside + // backupFiles() method (https://github.com/vitessio/vitess/blob/main/go/vt/mysqlctl/builtinbackupengine.go#L483). + require.NoError(t, createBackupDir(dataDir, "test1")) + require.NoError(t, createBackupDir(dataDir, "test2")) + require.NoError(t, createBackupFiles(path.Join(dataDir, "test1"), 2, "ibd")) + require.NoError(t, createBackupFiles(path.Join(dataDir, "test2"), 2, "ibd")) + defer os.RemoveAll(backupRoot) + + needIt, err := needInnoDBRedoLogSubdir() + require.NoError(t, err) + if needIt { + fpath := path.Join("log", mysql.DynamicRedoLogSubdir) + if err := createBackupDir(backupRoot, fpath); err != nil { + require.Failf(t, err.Error(), "failed to create directory: %s", fpath) + } + } + + // Set up topo + keyspace, shard := "mykeyspace", "-80" + ts := memorytopo.NewServer(ctx, "cell1") + defer ts.Close() + + require.NoError(t, ts.CreateKeyspace(ctx, keyspace, &topodata.Keyspace{})) + require.NoError(t, ts.CreateShard(ctx, keyspace, shard)) + + tablet := topo.NewTablet(100, "cell1", "mykeyspace-00-80-0100") + tablet.Keyspace = keyspace + tablet.Shard = shard + + require.NoError(t, ts.CreateTablet(ctx, tablet)) + + _, err = ts.UpdateShardFields(ctx, keyspace, shard, func(si *topo.ShardInfo) error { + si.PrimaryAlias = &topodata.TabletAlias{Uid: 100, Cell: "cell1"} + + now := time.Now() + si.PrimaryTermStartTime = &vttime.Time{Seconds: int64(now.Second()), Nanoseconds: int32(now.Nanosecond())} + + return nil + }) + + require.NoError(t, err) + + be := &mysqlctl.BuiltinBackupEngine{} + bh := filebackupstorage.NewBackupHandle(nil, "", "", false) + // Spin up a fake daemon to be used in backups. It needs to be allowed to receive: + // "STOP REPLICA", "START REPLICA", in that order. + fakedb := fakesqldb.New(t) + defer fakedb.Close() + mysqld := mysqlctl.NewFakeMysqlDaemon(fakedb) + defer mysqld.Close() + mysqld.ExpectedExecuteSuperQueryList = []string{"STOP REPLICA", "START REPLICA"} + + // With this setup, 4 backup files will be created (0, 1, 2, 3). For the last file (3), we create + // it in advance and remove all permission on the file so that the backup be.ExecuteBackup will not + // be able to override the file and thus will fail. Triggering the error mechanism after calling be.backupFile. + lastBackupFile := path.Join(backupRoot, "3") + f, err := os.Create(lastBackupFile) + require.NoError(t, err) + f.Write(make([]byte, 1024)) + require.NoError(t, f.Chmod(0444)) + require.NoError(t, f.Close()) + + backupResult, err := be.ExecuteBackup(ctx, mysqlctl.BackupParams{ + Logger: logutil.NewConsoleLogger(), + Mysqld: mysqld, + Cnf: &mysqlctl.Mycnf{ + InnodbDataHomeDir: path.Join(backupRoot, "innodb"), + InnodbLogGroupHomeDir: path.Join(backupRoot, "log"), + DataDir: path.Join(backupRoot, "datadir"), + }, + Stats: backupstats.NewFakeStats(), + Concurrency: 4, + HookExtraEnv: map[string]string{}, + TopoServer: ts, + Keyspace: keyspace, + Shard: shard, + MysqlShutdownTimeout: mysqlShutdownTimeout, + }, bh) + + require.ErrorContains(t, err, "cannot add file: 3") + assert.Equal(t, mysqlctl.BackupUnusable, backupResult) +} diff --git a/go/vt/mysqlctl/backup_blackbox_test.go b/go/vt/mysqlctl/backup_blackbox_test.go index 86f012d70a6..15244fb8782 100644 --- a/go/vt/mysqlctl/backup_blackbox_test.go +++ b/go/vt/mysqlctl/backup_blackbox_test.go @@ -406,110 +406,6 @@ func TestExecuteBackupWithCanceledContext(t *testing.T) { assert.Equal(t, mysqlctl.BackupUnusable, backupResult) } -// This test triggers a certain code path that only happens a backup file fails to be backed up, -// only and only if, all the other backup files have either started or finished. When we reach -// this scenario, files no longer try to acquire the semaphore and thus the backup cannot fail -// because of context deadline when acquiring it. At this point, the only place where the backup -// can fail, is if the return of be.backupFiles fails, and we record the error correctly. -// This test specifically test this scenario and arose because of issue https://github.com/vitessio/vitess/issues/17063 -// The test does: -// 1. Create the backup and data directory -// 2. Create a keyspace and shard -// 3. Already create the last backup file that would be created -// 4. Remove all permissions on this file -// 5. Execute the restore -// 6. The restore must fail due to an error on file number 3 ("cannot add file: 3") -func TestExecuteBackupWithFailureOnLastFile(t *testing.T) { - ctx := utils.LeakCheckContext(t) - - // Set up local backup directory - id := fmt.Sprintf("%d", time.Now().UnixNano()) - backupRoot := fmt.Sprintf("testdata/builtinbackup_test_%s", id) - filebackupstorage.FileBackupStorageRoot = backupRoot - require.NoError(t, createBackupDir(backupRoot, "innodb", "log", "datadir")) - dataDir := path.Join(backupRoot, "datadir") - // Add some files under data directory to force backup to execute semaphore acquire inside - // backupFiles() method (https://github.com/vitessio/vitess/blob/main/go/vt/mysqlctl/builtinbackupengine.go#L483). - require.NoError(t, createBackupDir(dataDir, "test1")) - require.NoError(t, createBackupDir(dataDir, "test2")) - require.NoError(t, createBackupFiles(path.Join(dataDir, "test1"), 2, "ibd")) - require.NoError(t, createBackupFiles(path.Join(dataDir, "test2"), 2, "ibd")) - defer os.RemoveAll(backupRoot) - - needIt, err := needInnoDBRedoLogSubdir() - require.NoError(t, err) - if needIt { - fpath := path.Join("log", mysql.DynamicRedoLogSubdir) - if err := createBackupDir(backupRoot, fpath); err != nil { - require.Failf(t, err.Error(), "failed to create directory: %s", fpath) - } - } - - // Set up topo - keyspace, shard := "mykeyspace", "-80" - ts := memorytopo.NewServer(ctx, "cell1") - defer ts.Close() - - require.NoError(t, ts.CreateKeyspace(ctx, keyspace, &topodata.Keyspace{})) - require.NoError(t, ts.CreateShard(ctx, keyspace, shard)) - - tablet := topo.NewTablet(100, "cell1", "mykeyspace-00-80-0100") - tablet.Keyspace = keyspace - tablet.Shard = shard - - require.NoError(t, ts.CreateTablet(ctx, tablet)) - - _, err = ts.UpdateShardFields(ctx, keyspace, shard, func(si *topo.ShardInfo) error { - si.PrimaryAlias = &topodata.TabletAlias{Uid: 100, Cell: "cell1"} - - now := time.Now() - si.PrimaryTermStartTime = &vttime.Time{Seconds: int64(now.Second()), Nanoseconds: int32(now.Nanosecond())} - - return nil - }) - - require.NoError(t, err) - - be := &mysqlctl.BuiltinBackupEngine{} - bh := filebackupstorage.NewBackupHandle(nil, "", "", false) - // Spin up a fake daemon to be used in backups. It needs to be allowed to receive: - // "STOP REPLICA", "START REPLICA", in that order. - fakedb := fakesqldb.New(t) - defer fakedb.Close() - mysqld := mysqlctl.NewFakeMysqlDaemon(fakedb) - defer mysqld.Close() - mysqld.ExpectedExecuteSuperQueryList = []string{"STOP REPLICA", "START REPLICA"} - - // With this setup, 4 backup files will be created (0, 1, 2, 3). For the last file (3), we create - // it in advance and remove all permission on the file so that the backup be.ExecuteBackup will not - // be able to override the file and thus will fail. Triggering the error mechanism after calling be.backupFile. - lastBackupFile := path.Join(backupRoot, "3") - f, err := os.Create(lastBackupFile) - require.NoError(t, err) - f.Write(make([]byte, 1024)) - require.NoError(t, os.Chmod(lastBackupFile, 0000)) - - backupResult, err := be.ExecuteBackup(ctx, mysqlctl.BackupParams{ - Logger: logutil.NewConsoleLogger(), - Mysqld: mysqld, - Cnf: &mysqlctl.Mycnf{ - InnodbDataHomeDir: path.Join(backupRoot, "innodb"), - InnodbLogGroupHomeDir: path.Join(backupRoot, "log"), - DataDir: path.Join(backupRoot, "datadir"), - }, - Stats: backupstats.NewFakeStats(), - Concurrency: 4, - HookExtraEnv: map[string]string{}, - TopoServer: ts, - Keyspace: keyspace, - Shard: shard, - MysqlShutdownTimeout: mysqlShutdownTimeout, - }, bh) - - require.ErrorContains(t, err, "cannot add file: 3") - assert.Equal(t, mysqlctl.BackupUnusable, backupResult) -} - // TestExecuteRestoreWithCanceledContext tests the ability of the restore function to gracefully handle cases where errors // occur due to various reasons, such as context timed-out. The process should not panic in these situations. func TestExecuteRestoreWithTimedOutContext(t *testing.T) { From 75cd42d0e0b0e09cded4a6e9d600256df7f41362 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 24 Oct 2024 21:46:29 -0600 Subject: [PATCH 6/8] Add release notes for known issue in v21.0.0 Signed-off-by: Florent Poinsard --- changelog/21.0/21.0.0/release_notes.md | 10 ++++++++++ changelog/21.0/21.0.0/summary.md | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/changelog/21.0/21.0.0/release_notes.md b/changelog/21.0/21.0.0/release_notes.md index a97adc5ad64..59052ec5166 100644 --- a/changelog/21.0/21.0.0/release_notes.md +++ b/changelog/21.0/21.0.0/release_notes.md @@ -3,6 +3,8 @@ ### Table of Contents +- **[Known Issue](#known-issues)** + - **[Backup reports itself as successful despite failures](#backup-reports-as-successful)** - **[Major Changes](#major-changes)** - **[Deprecations and Deletions](#deprecations-and-deletions)** - [Deprecated VTTablet Flags](#vttablet-flags) @@ -26,6 +28,14 @@ - **[vtctldclient ChangeTabletTags](#vtctldclient-changetablettags)** - **[Support for specifying expected primary in reparents](#reparents-expectedprimary)** +## Known Issue + +### Backup reports itself as successful despite failures + +In this release, we identified an issue where a backup may succeed even if a file fails to be backed up. +Leading to a successful backup, even if some errors occurred. +This only happen with the Builtin Backup Engine, and when all files have already been initiated in the backup process. +For more details, please refer to the related GitHub Issue https://github.com/vitessio/vitess/issues/17063. ## Major Changes diff --git a/changelog/21.0/21.0.0/summary.md b/changelog/21.0/21.0.0/summary.md index 9562c127952..8ab5273a9ab 100644 --- a/changelog/21.0/21.0.0/summary.md +++ b/changelog/21.0/21.0.0/summary.md @@ -2,6 +2,8 @@ ### Table of Contents +- **[Known Issue](#known-issues)** + - **[Backup reports itself as successful despite failures](#backup-reports-as-successful)** - **[Major Changes](#major-changes)** - **[Deprecations and Deletions](#deprecations-and-deletions)** - [Deprecated VTTablet Flags](#vttablet-flags) @@ -24,6 +26,14 @@ - **[vtctldclient ChangeTabletTags](#vtctldclient-changetablettags)** - **[Support for specifying expected primary in reparents](#reparents-expectedprimary)** +## Known Issue + +### Backup reports itself as successful despite failures + +In this release, we identified an issue where a backup may succeed even if a file fails to be backed up. +Leading to a successful backup, even if some errors occurred. +This only happen with the Builtin Backup Engine, and when all files have already been initiated in the backup process. +For more details, please refer to the related GitHub Issue https://github.com/vitessio/vitess/issues/17063. ## Major Changes From cfbeebab2e0ed5e68dc2085a4007581d813e9efc Mon Sep 17 00:00:00 2001 From: Florent Poinsard <35779988+frouioui@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:16:46 -0600 Subject: [PATCH 7/8] Update go/vt/mysqlctl/builtinbackupengine.go Co-authored-by: Matt Lord Signed-off-by: Florent Poinsard <35779988+frouioui@users.noreply.github.com> --- go/vt/mysqlctl/builtinbackupengine.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/go/vt/mysqlctl/builtinbackupengine.go b/go/vt/mysqlctl/builtinbackupengine.go index b9a9846d91e..f3cbe5364a0 100644 --- a/go/vt/mysqlctl/builtinbackupengine.go +++ b/go/vt/mysqlctl/builtinbackupengine.go @@ -643,8 +643,7 @@ func (be *BuiltinBackupEngine) backupFiles( // Backup the individual file. name := fmt.Sprintf("%v", i) - err := be.backupFile(ctxCancel, params, bh, fe, name) - if err != nil { + if err := be.backupFile(ctxCancel, params, bh, fe, name); err != nil { bh.RecordError(err) cancel() } From a84400b6ff9e75faad6052be654175aa0877ed74 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Fri, 25 Oct 2024 09:24:10 -0600 Subject: [PATCH 8/8] Apply review suggestions Signed-off-by: Florent Poinsard --- go/vt/mysqlctl/backup_blackbox_race_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/go/vt/mysqlctl/backup_blackbox_race_test.go b/go/vt/mysqlctl/backup_blackbox_race_test.go index 4e81b74e7fe..5414ebc5fa6 100644 --- a/go/vt/mysqlctl/backup_blackbox_race_test.go +++ b/go/vt/mysqlctl/backup_blackbox_race_test.go @@ -26,7 +26,6 @@ import ( "testing" "time" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "vitess.io/vitess/go/mysql" @@ -42,7 +41,7 @@ import ( "vitess.io/vitess/go/vt/topo/memorytopo" ) -// This test triggers a certain code path that only happens a backup file fails to be backed up, +// This test triggers a certain code path that only happens when a backup file fails to be backed up, // only and only if, all the other backup files have either started or finished. When we reach // this scenario, files no longer try to acquire the semaphore and thus the backup cannot fail // because of context deadline when acquiring it. At this point, the only place where the backup @@ -57,7 +56,7 @@ import ( // 6. The restore must fail due to an error on file number 3 ("cannot add file: 3") // // This test is extracted into its own file that won't be run if we do 'go test -race' as this test -// exposes an old race condition that will be fixed soon after https://github.com/vitessio/vitess/pull/17062 +// exposes an old race condition that will be fixed after https://github.com/vitessio/vitess/pull/17062 // Link to the race condition issue: https://github.com/vitessio/vitess/issues/17065 func TestExecuteBackupWithFailureOnLastFile(t *testing.T) { ctx := utils.LeakCheckContext(t) @@ -86,7 +85,7 @@ func TestExecuteBackupWithFailureOnLastFile(t *testing.T) { } // Set up topo - keyspace, shard := "mykeyspace", "-80" + keyspace, shard := "mykeyspace", "-" ts := memorytopo.NewServer(ctx, "cell1") defer ts.Close() @@ -126,7 +125,8 @@ func TestExecuteBackupWithFailureOnLastFile(t *testing.T) { lastBackupFile := path.Join(backupRoot, "3") f, err := os.Create(lastBackupFile) require.NoError(t, err) - f.Write(make([]byte, 1024)) + _, err = f.Write(make([]byte, 1024)) + require.NoError(t, err) require.NoError(t, f.Chmod(0444)) require.NoError(t, f.Close()) @@ -148,5 +148,5 @@ func TestExecuteBackupWithFailureOnLastFile(t *testing.T) { }, bh) require.ErrorContains(t, err, "cannot add file: 3") - assert.Equal(t, mysqlctl.BackupUnusable, backupResult) + require.Equal(t, mysqlctl.BackupUnusable, backupResult) }