From ede8e8da4f572f908f0720fb1a452379a6ab3cf9 Mon Sep 17 00:00:00 2001 From: Drew Walters Date: Fri, 1 May 2020 20:51:19 +0000 Subject: [PATCH] Add state validation to Redfish shutdown/startup The Redfish client reports success when Redfish returns success after a shutdown or startup signal is sent; however, airshipctl needs to verify that the host successfully reaches the desired power state before moving on. This change adds the same wait functionality from reboot to the power on and power off functionality. Change-Id: I56c6696394b547f97da79cec56726002c3f225db Signed-off-by: Drew Walters --- pkg/remote/redfish/client.go | 38 +++----- pkg/remote/redfish/client_test.go | 140 +++++++++++++++++++++++++++++- pkg/remote/redfish/errors.go | 2 +- pkg/remote/redfish/utils.go | 29 +++++++ 4 files changed, 179 insertions(+), 30 deletions(-) diff --git a/pkg/remote/redfish/client.go b/pkg/remote/redfish/client.go index e28b852bd..26d3cfd0f 100644 --- a/pkg/remote/redfish/client.go +++ b/pkg/remote/redfish/client.go @@ -117,30 +117,6 @@ func (c *Client) EjectVirtualMedia(ctx context.Context) error { // RebootSystem power cycles a host by sending a shutdown signal followed by a power on signal. func (c *Client) RebootSystem(ctx context.Context) error { - waitForPowerState := func(desiredState redfishClient.PowerState) error { - // Check if number of retries is defined in context - totalRetries, ok := ctx.Value(ctxKeyNumRetries).(int) - if !ok { - totalRetries = systemActionRetries - } - - for retry := 0; retry <= totalRetries; retry++ { - system, httpResp, err := c.RedfishAPI.GetSystem(ctx, c.nodeID) - if err = ScreenRedfishError(httpResp, err); err != nil { - return err - } - if system.PowerState == desiredState { - log.Debugf("Node '%s' reached power state '%s'.", c.nodeID, desiredState) - return nil - } - c.Sleep(systemRebootDelay) - } - return ErrOperationRetriesExceeded{ - What: fmt.Sprintf("reboot system %s", c.nodeID), - Retries: totalRetries, - } - } - log.Debugf("Rebooting node '%s': powering off.", c.nodeID) resetReq := redfishClient.ResetRequestBody{} @@ -153,7 +129,7 @@ func (c *Client) RebootSystem(ctx context.Context) error { } // Check that node is powered off - if err = waitForPowerState(redfishClient.POWERSTATE_OFF); err != nil { + if err = c.waitForPowerState(ctx, redfishClient.POWERSTATE_OFF); err != nil { return err } @@ -168,7 +144,7 @@ func (c *Client) RebootSystem(ctx context.Context) error { } // Check that node is powered on and return - return waitForPowerState(redfishClient.POWERSTATE_ON) + return c.waitForPowerState(ctx, redfishClient.POWERSTATE_ON) } // SetBootSourceByType sets the boot source of the ephemeral node to one that's compatible with the boot @@ -246,8 +222,11 @@ func (c *Client) SystemPowerOff(ctx context.Context) error { resetReq.ResetType = redfishClient.RESETTYPE_FORCE_OFF _, httpResp, err := c.RedfishAPI.ResetSystem(ctx, c.nodeID, resetReq) + if err = ScreenRedfishError(httpResp, err); err != nil { + return err + } - return ScreenRedfishError(httpResp, err) + return c.waitForPowerState(ctx, redfishClient.POWERSTATE_OFF) } // SystemPowerOn powers on a host. @@ -256,8 +235,11 @@ func (c *Client) SystemPowerOn(ctx context.Context) error { resetReq.ResetType = redfishClient.RESETTYPE_ON _, httpResp, err := c.RedfishAPI.ResetSystem(ctx, c.nodeID, resetReq) + if err = ScreenRedfishError(httpResp, err); err != nil { + return err + } - return ScreenRedfishError(httpResp, err) + return c.waitForPowerState(ctx, redfishClient.POWERSTATE_ON) } // SystemPowerStatus retrieves the power status of a host as a human-readable string. diff --git a/pkg/remote/redfish/client_test.go b/pkg/remote/redfish/client_test.go index 7fdde54da..b24b88937 100644 --- a/pkg/remote/redfish/client_test.go +++ b/pkg/remote/redfish/client_test.go @@ -290,7 +290,7 @@ func TestRebootSystemTimeout(t *testing.T) { client.Sleep = func(_ time.Duration) {} err = client.RebootSystem(ctx) - assert.Equal(t, ErrOperationRetriesExceeded{What: "reboot system System.Embedded.1", Retries: 1}, err) + assert.Error(t, err) } func TestSetBootSourceByTypeGetSystemError(t *testing.T) { @@ -645,3 +645,141 @@ func TestSystemPowerStatusGetSystemError(t *testing.T) { _, err = client.SystemPowerStatus(ctx) assert.Error(t, err) } + +func TestWaitForPowerStateGetSystemFailed(t *testing.T) { + m := &redfishMocks.RedfishAPI{} + defer m.AssertExpectations(t) + + _, client, err := NewClient(redfishURL, false, false, "", "") + assert.NoError(t, err) + + ctx := context.WithValue(context.Background(), ctxKeyNumRetries, 1) + resetReq := redfishClient.ResetRequestBody{} + resetReq.ResetType = redfishClient.RESETTYPE_FORCE_OFF + + m.On("GetSystem", ctx, client.nodeID).Return( + redfishClient.ComputerSystem{}, &http.Response{StatusCode: 500}, nil) + + // Replace normal API client with mocked API client + client.RedfishAPI = m + + // Mock out the Sleep function so we don't have to wait on it + client.Sleep = func(_ time.Duration) {} + + err = client.waitForPowerState(ctx, redfishClient.POWERSTATE_OFF) + assert.Error(t, err) +} + +func TestWaitForPowerStateNoRetries(t *testing.T) { + m := &redfishMocks.RedfishAPI{} + defer m.AssertExpectations(t) + + _, client, err := NewClient(redfishURL, false, false, "", "") + assert.NoError(t, err) + + ctx := context.WithValue(context.Background(), ctxKeyNumRetries, 1) + resetReq := redfishClient.ResetRequestBody{} + resetReq.ResetType = redfishClient.RESETTYPE_FORCE_OFF + + m.On("GetSystem", ctx, client.nodeID).Return( + redfishClient.ComputerSystem{ + PowerState: redfishClient.POWERSTATE_OFF, + }, &http.Response{StatusCode: 200}, nil) + + // Replace normal API client with mocked API client + client.RedfishAPI = m + + // Mock out the Sleep function so we don't have to wait on it + client.Sleep = func(_ time.Duration) {} + + err = client.waitForPowerState(ctx, redfishClient.POWERSTATE_OFF) + assert.NoError(t, err) +} + +func TestWaitForPowerStateWithRetries(t *testing.T) { + m := &redfishMocks.RedfishAPI{} + defer m.AssertExpectations(t) + + _, client, err := NewClient(redfishURL, false, false, "", "") + assert.NoError(t, err) + + ctx := context.WithValue(context.Background(), ctxKeyNumRetries, 1) + resetReq := redfishClient.ResetRequestBody{} + resetReq.ResetType = redfishClient.RESETTYPE_FORCE_OFF + + m.On("GetSystem", ctx, client.nodeID).Return( + redfishClient.ComputerSystem{ + PowerState: redfishClient.POWERSTATE_ON, + }, &http.Response{StatusCode: 200}, nil).Times(1) + + m.On("GetSystem", ctx, client.nodeID).Return( + redfishClient.ComputerSystem{ + PowerState: redfishClient.POWERSTATE_OFF, + }, &http.Response{StatusCode: 200}, nil).Times(1) + + // Replace normal API client with mocked API client + client.RedfishAPI = m + + // Mock out the Sleep function so we don't have to wait on it + client.Sleep = func(_ time.Duration) {} + + err = client.waitForPowerState(ctx, redfishClient.POWERSTATE_OFF) + assert.NoError(t, err) +} + +func TestWaitForPowerStateRetriesExceeded(t *testing.T) { + m := &redfishMocks.RedfishAPI{} + defer m.AssertExpectations(t) + + _, client, err := NewClient(redfishURL, false, false, "", "") + assert.NoError(t, err) + + ctx := context.WithValue(context.Background(), ctxKeyNumRetries, 1) + resetReq := redfishClient.ResetRequestBody{} + resetReq.ResetType = redfishClient.RESETTYPE_FORCE_OFF + + m.On("GetSystem", ctx, client.nodeID).Return( + redfishClient.ComputerSystem{ + PowerState: redfishClient.POWERSTATE_ON, + }, &http.Response{StatusCode: 200}, nil) + + m.On("GetSystem", ctx, client.nodeID).Return( + redfishClient.ComputerSystem{ + PowerState: redfishClient.POWERSTATE_ON, + }, &http.Response{StatusCode: 200}, nil) + + // Replace normal API client with mocked API client + client.RedfishAPI = m + + // Mock out the Sleep function so we don't have to wait on it + client.Sleep = func(_ time.Duration) {} + + err = client.waitForPowerState(ctx, redfishClient.POWERSTATE_OFF) + assert.Error(t, err) +} + +func TestWaitForPowerStateDifferentPowerState(t *testing.T) { + m := &redfishMocks.RedfishAPI{} + defer m.AssertExpectations(t) + + _, client, err := NewClient(redfishURL, false, false, "", "") + assert.NoError(t, err) + + ctx := context.WithValue(context.Background(), ctxKeyNumRetries, 1) + resetReq := redfishClient.ResetRequestBody{} + resetReq.ResetType = redfishClient.RESETTYPE_FORCE_ON + + m.On("GetSystem", ctx, client.nodeID).Return( + redfishClient.ComputerSystem{ + PowerState: redfishClient.POWERSTATE_ON, + }, &http.Response{StatusCode: 200}, nil) + + // Replace normal API client with mocked API client + client.RedfishAPI = m + + // Mock out the Sleep function so we don't have to wait on it + client.Sleep = func(_ time.Duration) {} + + err = client.waitForPowerState(ctx, redfishClient.POWERSTATE_ON) + assert.NoError(t, err) +} diff --git a/pkg/remote/redfish/errors.go b/pkg/remote/redfish/errors.go index 01c26a9d9..183bc568f 100644 --- a/pkg/remote/redfish/errors.go +++ b/pkg/remote/redfish/errors.go @@ -46,7 +46,7 @@ type ErrOperationRetriesExceeded struct { } func (e ErrOperationRetriesExceeded) Error() string { - return fmt.Sprintf("operation %s failed. Maximum retries (%d) exceeded", e.What, e.Retries) + return fmt.Sprintf("Unable to %s. Maximum retries (%d) exceeded.", e.What, e.Retries) } // ErrUnrecognizedRedfishResponse is a debug error that describes unexpected formats in a Redfish error response. diff --git a/pkg/remote/redfish/utils.go b/pkg/remote/redfish/utils.go index 03ac6139e..0cea15eef 100644 --- a/pkg/remote/redfish/utils.go +++ b/pkg/remote/redfish/utils.go @@ -251,3 +251,32 @@ func getBasePath(redfishURL string) (string, error) { return baseURL, nil } + +func (c Client) waitForPowerState(ctx context.Context, desiredState redfishClient.PowerState) error { + log.Debugf("Waiting for node '%s' to reach power state '%s'.", c.nodeID, desiredState) + + // Check if number of retries is defined in context + totalRetries, ok := ctx.Value(ctxKeyNumRetries).(int) + if !ok { + totalRetries = systemActionRetries + } + + for retry := 0; retry <= totalRetries; retry++ { + system, httpResp, err := c.RedfishAPI.GetSystem(ctx, c.NodeID()) + if err = ScreenRedfishError(httpResp, err); err != nil { + return err + } + + if system.PowerState == desiredState { + log.Debugf("Node '%s' reached power state '%s'.", c.nodeID, desiredState) + return nil + } + + c.Sleep(systemRebootDelay) + } + + return ErrOperationRetriesExceeded{ + What: fmt.Sprintf("reach desired power state %s", desiredState), + Retries: totalRetries, + } +}