Skip to content

Commit

Permalink
Fix error message for planned reparent shard (#15529)
Browse files Browse the repository at this point in the history
Signed-off-by: Manan Gupta <[email protected]>
  • Loading branch information
GuptaManan100 authored Mar 21, 2024
1 parent 3d313b9 commit ae066f6
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 21 deletions.
10 changes: 9 additions & 1 deletion go/vt/vtctl/reparentutil/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package reparentutil
import (
"context"
"fmt"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -88,18 +89,23 @@ func ElectNewPrimary(

// candidates are the list of tablets that can be potentially promoted after filtering out based on preliminary checks.
candidates := []*topodatapb.Tablet{}
reasonsToInvalidate := strings.Builder{}
for _, tablet := range tabletMap {
switch {
case newPrimaryAlias != nil:
// If newPrimaryAlias is provided, then that is the only valid tablet, even if it is not of type replica or in a different cell.
if !topoproto.TabletAliasEqual(tablet.Alias, newPrimaryAlias) {
reasonsToInvalidate.WriteString(fmt.Sprintf("\n%v does not match the new primary alias provided", topoproto.TabletAliasString(tablet.Alias)))
continue
}
case primaryCell != "" && tablet.Alias.Cell != primaryCell:
reasonsToInvalidate.WriteString(fmt.Sprintf("\n%v is not in the same cell as the previous primary", topoproto.TabletAliasString(tablet.Alias)))
continue
case avoidPrimaryAlias != nil && topoproto.TabletAliasEqual(tablet.Alias, avoidPrimaryAlias):
reasonsToInvalidate.WriteString(fmt.Sprintf("\n%v matches the primary alias to avoid", topoproto.TabletAliasString(tablet.Alias)))
continue
case tablet.Tablet.Type != topodatapb.TabletType_REPLICA:
reasonsToInvalidate.WriteString(fmt.Sprintf("\n%v is not a replica", topoproto.TabletAliasString(tablet.Alias)))
continue
}

Expand All @@ -124,6 +130,8 @@ func ElectNewPrimary(
if err == nil && (tolerableReplLag == 0 || tolerableReplLag >= replLag) {
validTablets = append(validTablets, tb)
tabletPositions = append(tabletPositions, pos)
} else {
reasonsToInvalidate.WriteString(fmt.Sprintf("\n%v has %v replication lag which is more than the tolerable amount", topoproto.TabletAliasString(tablet.Alias), replLag))
}
return err
})
Expand All @@ -136,7 +144,7 @@ func ElectNewPrimary(

// return an error if there are no valid tablets available
if len(validTablets) == 0 {
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "cannot find a tablet to reparent to in the same cell as the current primary")
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "cannot find a tablet to reparent to%v", reasonsToInvalidate.String())
}

// sort the tablets for finding the best primary
Expand Down
58 changes: 38 additions & 20 deletions go/vt/vtctl/reparentutil/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func TestElectNewPrimary(t *testing.T) {
avoidPrimaryAlias *topodatapb.TabletAlias
tolerableReplLag time.Duration
expected *topodatapb.TabletAlias
shouldErr bool
errContains []string
}{
{
name: "found a replica",
Expand Down Expand Up @@ -135,7 +135,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 102,
},
shouldErr: false,
errContains: nil,
},
{
name: "new primary alias provided - no tolerable replication lag",
Expand Down Expand Up @@ -174,7 +174,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 101,
},
shouldErr: false,
errContains: nil,
},
{
name: "new primary alias provided - with tolerable replication lag",
Expand Down Expand Up @@ -231,7 +231,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 102,
},
shouldErr: false,
errContains: nil,
},
{
name: "new primary alias provided - with intolerable replication lag",
Expand Down Expand Up @@ -284,8 +284,13 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 102,
},
expected: nil,
shouldErr: true,
expected: nil,
errContains: []string{
`cannot find a tablet to reparent to`,
`zone1-0000000100 does not match the new primary alias provided`,
`zone1-0000000101 does not match the new primary alias provided`,
`zone1-0000000102 has 1m40s replication lag which is more than the tolerable amount`,
},
},
{
name: "found a replica ignoring replica lag",
Expand Down Expand Up @@ -345,7 +350,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 102,
},
shouldErr: false,
errContains: nil,
},
{
name: "found a replica - ignore one with replication lag",
Expand Down Expand Up @@ -405,7 +410,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 101,
},
shouldErr: false,
errContains: nil,
},
{
name: "found a replica - more advanced relay log position",
Expand Down Expand Up @@ -465,7 +470,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 102,
},
shouldErr: false,
errContains: nil,
},
{
name: "no active primary in shard",
Expand Down Expand Up @@ -505,7 +510,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 101,
},
shouldErr: false,
errContains: nil,
},
{
name: "avoid primary alias is nil",
Expand Down Expand Up @@ -547,7 +552,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 101,
},
shouldErr: false,
errContains: nil,
}, {
name: "avoid primary alias and shard primary are nil",
tmc: &chooseNewPrimaryTestTMClient{
Expand Down Expand Up @@ -586,7 +591,7 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 101,
},
shouldErr: false,
errContains: nil,
},
{
name: "no replicas in primary cell",
Expand Down Expand Up @@ -640,8 +645,13 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 0,
},
expected: nil,
shouldErr: true,
expected: nil,
errContains: []string{
`cannot find a tablet to reparent to`,
`zone2-0000000200 is not a replica`,
`zone1-0000000101 is not in the same cell as the previous primary`,
`zone1-0000000102 is not in the same cell as the previous primary`,
},
},
{
name: "only available tablet is AvoidPrimary",
Expand Down Expand Up @@ -677,8 +687,11 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 101,
},
expected: nil,
shouldErr: true,
expected: nil,
errContains: []string{
`cannot find a tablet to reparent to
zone1-0000000101 matches the primary alias to avoid`,
},
},
{
name: "no replicas in shard",
Expand All @@ -704,8 +717,11 @@ func TestElectNewPrimary(t *testing.T) {
Cell: "zone1",
Uid: 0,
},
expected: nil,
shouldErr: true,
expected: nil,
errContains: []string{
`cannot find a tablet to reparent to
zone1-0000000100 is not a replica`,
},
},
}

Expand All @@ -716,8 +732,10 @@ func TestElectNewPrimary(t *testing.T) {
t.Parallel()

actual, err := ElectNewPrimary(ctx, tt.tmc, tt.shardInfo, tt.tabletMap, tt.newPrimaryAlias, tt.avoidPrimaryAlias, time.Millisecond*50, tt.tolerableReplLag, durability, logger)
if tt.shouldErr {
assert.Error(t, err)
if len(tt.errContains) > 0 {
for _, errC := range tt.errContains {
assert.ErrorContains(t, err, errC)
}
return
}

Expand Down

0 comments on commit ae066f6

Please sign in to comment.