Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reboot operation #11

Merged
merged 7 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions system/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (

spb "github.com/openconfig/gnoi/system"
tpb "github.com/openconfig/gnoi/types"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"github.com/openconfig/gnoigo/internal"
)
Expand Down Expand Up @@ -151,6 +153,106 @@ func (p *PingOperation) Execute(ctx context.Context, c *internal.Clients) ([]*sp
}
}

// RebootOperation represents the parameters of a Reboot operation.
type RebootOperation struct {
rebootMethod spb.RebootMethod
delay time.Duration
message string
subcomponents []*tpb.Path
force bool
ignoreUnavailableErr bool
waitForActive bool
}

// NewRebootOperation creates an empty RebootOperation.
func NewRebootOperation() *RebootOperation {
return &RebootOperation{}
}

// RebootMethod specifies method to reboot.
func (r *RebootOperation) RebootMethod(rebootMethod spb.RebootMethod) *RebootOperation {
r.rebootMethod = rebootMethod
return r
}

// Delay specifies time in nanoseconds to wait before issuing reboot.
func (r *RebootOperation) Delay(delay time.Duration) *RebootOperation {
r.delay = delay
return r
}

// Message specifies informational reason for the reboot or cancel reboot.
func (r *RebootOperation) Message(message string) *RebootOperation {
r.message = message
return r
}

// Subcomponents specifies the sub-components to reboot.
func (r *RebootOperation) Subcomponents(subcomponents []*tpb.Path) *RebootOperation {
r.subcomponents = subcomponents
return r
}

// Force reboot if sanity checks fail.
func (r *RebootOperation) Force(force bool) *RebootOperation {
r.force = force
return r
}

// IgnoreUnavailableErr ignores unavailable errors returned by reboot status.
func (r *RebootOperation) IgnoreUnavailableErr(ignoreUnavailableErr bool) *RebootOperation {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we just drop this and always ignore UnavailableErrors when waiting for active

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm.. is UnavailableError always acceptable after reboot?

Having a function to ignore the error gives the flexibility to ignore errors based on the usecase.

I do see a test here which performs a reboot and expects error to be nil -> https://github.com/openconfig/featureprofiles/blob/main/feature/gnoi/system/tests/chassis_reboot_status_and_cancel_test/chassis_reboot_status_and_cancel_test.go#L168

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I see the pros and cons. Three comments:

First, It just seems to me that the likelihood that we would see an unexpected Unavailable error it is very low, especially given that the prior RebootRequest must have succeeded by then. Other errors would obviously still be reported.

Second, almost all uses of WaitForActive(true) is going to want to be accompanied by IgnoreUnavailableErr(true), because nearly all will be rebooting the chassis, and if not that the management linecard. So I feel like IgnoreUnavailableErr(true) is going to be one of these lines people just copy and paste everywhere anyway, regardless of whether they need it, because that's what most calls look like.

Third, I'm holding out some hope that we can extend the RebootRequestResponse to make IgnoreUnavailableErr eventually unnecessary. I'm going to be starting a thread on the OpenConfig chat about that.

Due to those three, I was feeling like IgnoreUnavailableErr maybe isn't offering enough to pull its weight. All that said, I still see the reasons for including it, so I'm fine with you leaving it in if you prefer to keep it.

r.ignoreUnavailableErr = ignoreUnavailableErr
return r
}

// WaitForActive waits until RebootResponse returns active.
func (r *RebootOperation) WaitForActive(waitForActive bool) *RebootOperation {
r.waitForActive = waitForActive
return r
}

// Execute performs the Reboot operation and will wait for rebootStatus to be active if waitForActive is set to true.
func (r *RebootOperation) Execute(ctx context.Context, c *internal.Clients) (*spb.RebootResponse, error) {
resp, err := c.System().Reboot(ctx, &spb.RebootRequest{
Method: r.rebootMethod,
Delay: uint64(r.delay.Nanoseconds()),
Message: r.message,
Subcomponents: r.subcomponents,
Force: r.force,
})

if err != nil {
return nil, err
}

if r.waitForActive {
for {
rebootStatus, statusErr := c.System().RebootStatus(ctx, &spb.RebootStatusRequest{Subcomponents: r.subcomponents})
var waitTime time.Duration

switch {
case status.Code(statusErr) == codes.Unavailable && r.ignoreUnavailableErr:
waitTime = 10 * time.Second
case statusErr != nil:
return nil, statusErr
case rebootStatus.GetActive():
return resp, nil
default:
waitTime = time.Duration(rebootStatus.GetWait()) * time.Second
}

select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(waitTime):
continue
}
}
}

return resp, nil
}

// SwitchControlProcessorOperation represents the parameters of a SwitchControlProcessor operation.
type SwitchControlProcessorOperation struct {
req *spb.SwitchControlProcessorRequest
Expand Down
164 changes: 141 additions & 23 deletions system/system_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"testing"

"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/testing/protocmp"

"github.com/google/go-cmp/cmp"
Expand All @@ -35,6 +37,8 @@ type fakeSystemClient struct {
spb.SystemClient
KillProcessFn func(context.Context, *spb.KillProcessRequest, ...grpc.CallOption) (*spb.KillProcessResponse, error)
PingFn func(context.Context, *spb.PingRequest, ...grpc.CallOption) (spb.System_PingClient, error)
RebootFn func(context.Context, *spb.RebootRequest, ...grpc.CallOption) (*spb.RebootResponse, error)
RebootStatusFn func(context.Context, *spb.RebootStatusRequest, ...grpc.CallOption) (*spb.RebootStatusResponse, error)
SwitchControlProcessorFn func(context.Context, *spb.SwitchControlProcessorRequest, ...grpc.CallOption) (*spb.SwitchControlProcessorResponse, error)
TimeFn func(context.Context, *spb.TimeRequest, ...grpc.CallOption) (*spb.TimeResponse, error)
TracerouteFn func(context.Context, *spb.TracerouteRequest, ...grpc.CallOption) (spb.System_TracerouteClient, error)
Expand All @@ -56,27 +60,20 @@ func (fg *fakeSystemClient) SwitchControlProcessor(ctx context.Context, in *spb.
return fg.SwitchControlProcessorFn(ctx, in, opts...)
}

func (fg *fakeSystemClient) Time(ctx context.Context, in *spb.TimeRequest, opts ...grpc.CallOption) (*spb.TimeResponse, error) {
return fg.TimeFn(ctx, in, opts...)
func (fg *fakeSystemClient) Reboot(ctx context.Context, in *spb.RebootRequest, opts ...grpc.CallOption) (*spb.RebootResponse, error) {
return fg.RebootFn(ctx, in, opts...)
}

func (fg *fakeSystemClient) Traceroute(ctx context.Context, in *spb.TracerouteRequest, opts ...grpc.CallOption) (spb.System_TracerouteClient, error) {
return fg.TracerouteFn(ctx, in, opts...)
func (fg *fakeSystemClient) RebootStatus(ctx context.Context, in *spb.RebootStatusRequest, opts ...grpc.CallOption) (*spb.RebootStatusResponse, error) {
return fg.RebootStatusFn(ctx, in, opts...)
}

type fakePingClient struct {
spb.System_PingClient
resp []*spb.PingResponse
err error
func (fg *fakeSystemClient) Time(ctx context.Context, in *spb.TimeRequest, opts ...grpc.CallOption) (*spb.TimeResponse, error) {
return fg.TimeFn(ctx, in, opts...)
}

func (pc *fakePingClient) Recv() (*spb.PingResponse, error) {
if len(pc.resp) == 0 && pc.err == nil {
return nil, io.EOF
}
resp := pc.resp[0]
pc.resp = pc.resp[1:]
return resp, pc.err
func (fg *fakeSystemClient) Traceroute(ctx context.Context, in *spb.TracerouteRequest, opts ...grpc.CallOption) (spb.System_TracerouteClient, error) {
return fg.TracerouteFn(ctx, in, opts...)
}

func TestKillProcess(t *testing.T) {
Expand Down Expand Up @@ -118,19 +115,19 @@ func TestKillProcess(t *testing.T) {
}
}

type fakeTracerouteClient struct {
spb.System_TracerouteClient
resp []*spb.TracerouteResponse
type fakePingClient struct {
spb.System_PingClient
resp []*spb.PingResponse
err error
}

func (tc *fakeTracerouteClient) Recv() (*spb.TracerouteResponse, error) {
if len(tc.resp) == 0 && tc.err == nil {
func (pc *fakePingClient) Recv() (*spb.PingResponse, error) {
if len(pc.resp) == 0 && pc.err == nil {
return nil, io.EOF
}
resp := tc.resp[0]
tc.resp = tc.resp[1:]
return resp, tc.err
resp := pc.resp[0]
pc.resp = pc.resp[1:]
return resp, pc.err
}

func TestPing(t *testing.T) {
Expand Down Expand Up @@ -182,6 +179,112 @@ func TestPing(t *testing.T) {
}
}

func TestReboot(t *testing.T) {
tests := []struct {
desc string
op *system.RebootOperation
want *spb.RebootResponse
rebootErr, wantErr string
statusErrs []error
statusResps []*spb.RebootStatusResponse
cancelContext bool
}{
{
desc: "Test reboot",
op: system.NewRebootOperation().RebootMethod(spb.RebootMethod_COLD).Subcomponents([]*tpb.Path{{
Elem: []*tpb.PathElem{
{Name: "components"},
{Name: "component", Key: map[string]string{"name": "RP0"}},
},
}}),
want: &spb.RebootResponse{},
},
{
desc: "Test reboot wait for active status",
op: system.NewRebootOperation().RebootMethod(spb.RebootMethod_COLD).WaitForActive(true),
statusResps: []*spb.RebootStatusResponse{{Active: true}},
want: &spb.RebootResponse{},
},
{
desc: "Test reboot wait for active status and ignore unavailable error",
op: system.NewRebootOperation().RebootMethod(spb.RebootMethod_COLD).WaitForActive(true).IgnoreUnavailableErr(true),
statusErrs: []error{status.Errorf(codes.Unavailable, "unavailable")},
statusResps: []*spb.RebootStatusResponse{{Active: true}},
want: &spb.RebootResponse{},
},
{
desc: "Test reboot with non active status response",
op: system.NewRebootOperation().RebootMethod(spb.RebootMethod_COLD).WaitForActive(true),
statusResps: []*spb.RebootStatusResponse{{Active: false, Wait: 2}, {Active: true}},
want: &spb.RebootResponse{},
},
{
desc: "Test reboot wait for active status returns unknown error",
op: system.NewRebootOperation().RebootMethod(spb.RebootMethod_COLD).WaitForActive(true).IgnoreUnavailableErr(true),
statusErrs: []error{status.Errorf(codes.Unknown, "unknown")},
wantErr: "unknown",
},
{
desc: "Test reboot returns error on reboot",
op: system.NewRebootOperation().RebootMethod(spb.RebootMethod_COLD),
rebootErr: "Reboot operation error",
wantErr: "Reboot operation error",
},
{
desc: "Test reboot returns error on reboot status",
op: system.NewRebootOperation().RebootMethod(spb.RebootMethod_COLD).WaitForActive(true),
statusErrs: []error{status.Errorf(codes.Unavailable, "unavailable")},
wantErr: "unavailable",
},
{
desc: "Test reboot with context cancel",
op: system.NewRebootOperation().RebootMethod(spb.RebootMethod_COLD).WaitForActive(true),
statusResps: []*spb.RebootStatusResponse{{Wait: 20}},
wantErr: "context",
cancelContext: true,
},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
var fakeClient internal.Clients
fakeClient.SystemClient = &fakeSystemClient{
RebootFn: func(context.Context, *spb.RebootRequest, ...grpc.CallOption) (*spb.RebootResponse, error) {
if tt.rebootErr != "" {
return nil, fmt.Errorf(tt.rebootErr)
}
return tt.want, nil
},
RebootStatusFn: func(context.Context, *spb.RebootStatusRequest, ...grpc.CallOption) (*spb.RebootStatusResponse, error) {
if len(tt.statusErrs) > 0 {
statusErr := tt.statusErrs[0]
tt.statusErrs = tt.statusErrs[1:]
return nil, statusErr
}
if len(tt.statusResps) > 0 {
statusResp := tt.statusResps[0]
tt.statusResps = tt.statusResps[1:]
return statusResp, nil
}
return &spb.RebootStatusResponse{}, nil
}}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if tt.cancelContext {
cancel()
}

got, gotErr := tt.op.Execute(ctx, &fakeClient)
if (gotErr == nil) != (tt.wantErr == "") || (gotErr != nil && !strings.Contains(gotErr.Error(), tt.wantErr)) {
t.Errorf("Execute() got unexpected error %v want %s", gotErr, tt.wantErr)
}
if tt.want != got {
t.Errorf("Execute() got unexpected response want %v got %v", tt.want, got)
}
})
}
}

func TestSwitchControlProcessor(t *testing.T) {
tests := []struct {
desc string
Expand Down Expand Up @@ -281,6 +384,21 @@ func TestTime(t *testing.T) {
}
}

type fakeTracerouteClient struct {
spb.System_TracerouteClient
resp []*spb.TracerouteResponse
err error
}

func (tc *fakeTracerouteClient) Recv() (*spb.TracerouteResponse, error) {
if len(tc.resp) == 0 && tc.err == nil {
return nil, io.EOF
}
resp := tc.resp[0]
tc.resp = tc.resp[1:]
return resp, tc.err
}

func TestTraceroute(t *testing.T) {
tests := []struct {
desc string
Expand Down