metal/mtce/src/maintenance/mtcBmcUtil.cpp
Eric MacDonald df9343b0cc Add redfish power/reset/reinstall bmc support to maintenance
This update delivers redfish support for Power-On/Off, Reset
and Netboot Reinstall handling to maintenance.

Test Plan: (Testing Continues)

PASS: Verify Redfish Power-Off action handling
PASS: Verify Redfish Power-On action handling
PASS: Verify Redfish Reset action handling
PASS: Verify compute Redfish Reinstall action handling from controller-0
PASS: Verify compute Redfish Reinstall action handling from controller-1
PASS: Verify Redfish Power-Off Action failure handling
PASS: Verify Redfish Power-On action failure handling
PASS: Verify Redfish Reset action failure handling
PASS: Verify Redfish Re-Install action failure handling
PASS: verify Reset progression cycle does not leak memory.
PASS: Verify bmc_handler failure handling does not leak memory.
PASS: Verify Inservice BMC access (ping) failure and recovery handling.
PASS: Verify BMC access failure alarm handling
PASS: Verify BMC provisioning and deprovisioning soak (redfish - wolfpass)
PASS: Verify BMC provisioning and deprovisioning does not leak memory.
PASS: Verify BMC provisioning handling with bad ip and/or bad username
PASS: Verify BMC reprovisioning to same protocol
PASS: Verify BMC reprovisioning from ipmi host to redfish host
PASS: Verify BMC reprovisioning from redfish host to ipmi host
PASS: Verify mixed protocol support in same lab
PASS: Verify mixed server support in same lab
PASS: Verify Large System Install with BMCs provisioned (wp8-12)
PASS: Verify bmc access method (learn,ipmi,redfish) learned from mtc.init
PASS: Verify Swact with BMCs provisioned.
PASS: Verify no segfaults.
PASS: Verify AIO System Install in lab that supports redfish (WC3-6, WP8-12, Dell 720 3-7)
PASS: Verify AIO Simplex Install with Redfish Support (SM1, SM3)
PASS: Verify AIO Duplex Install with Redfish Support (SM 5-6, Dell 720 1-2

Useability:

PASS: Verify handling of reprovisioning BMC between hosts that support
             different protocols.
PASS: Verify handling of reprovisioning ip address to host that leads to a
             different protocol select.
PASS: Verify manual relearn handling to recover from errors that result from
             the above case.
PASS: Verify host BMC deprovisioning handling and cleanup.
PASS: Verify sensor monitoring.
PASS: Verify fault insertion for both protocols and action handling.
PASS: Verify protocol select handover.
PASS: Verify hwmond sticks with a selected protocol once a sensor model
             has been created using that protocol.
PASS: Verify handling of missing bmc_access_method configuration select.
PASS: Verify inservice bmc_access_method service parameter modification handling.

Regression:

PASS: Verify redfish BMC info query logging.
PASS: Verify sensor monitoring and alarming still works.
PASS: Verify all power/reset/netboot commands for IPMI
PASS: Verify reprovisioning soak of Wolfpass servers
PASS: Verify reprovisioning soak of SM servers

Depends-on: https://review.opendev.org/#/c/679178/
Change-Id: I984057e04d7426e37d675cf4d334a4e35419f2e8
Story: 2005861
Task: 35826
Task: 36606
Task: 36467
Task: 36456
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
2019-09-26 15:59:35 -04:00

408 lines
16 KiB
C++

/*
* Copyright (c) 2017 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
*
*
* @file
* Wind River Titanium Cloud Maintenance BMC Utilities
*/
#include <stdio.h>
#include <iostream>
#include <string.h>
using namespace std;
#include "nodeBase.h" /* for ... mtce common definitions */
#include "nodeClass.h" /* for ... */
#include "bmcUtil.h" /* for ... mtce-common bmc utility header */
/*****************************************************************************
*
* Name : bmc_command_send
*
* Description: This utility starts the bmc command handling thread
* with the specified command.
*
* Returns : PASS if all the pre-start semantic checks pass and the
* thread was started.
*
* Otherwise the thread was not started and some non zero
* FAIL_xxxx code is returned after a representative design
* log is generated.
*
*****************************************************************************/
int nodeLinkClass::bmc_command_send ( struct nodeLinkClass::node * node_ptr,
int command )
{
int rc = PASS ;
node_ptr->bmc_thread_info.command = command ;
/* Update / Setup the BMC access credentials */
node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ;
node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ;
node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw ;
/* Special case handling for Redfish Root (BMC) Query command.
* Current protocol override for this command that only applies
* to redfish and used for the bmc protocol learning process. */
if ( command == BMC_THREAD_CMD__BMC_QUERY )
node_ptr->bmc_thread_info.proto = BMC_PROTOCOL__REDFISHTOOL ;
else
node_ptr->bmc_thread_info.proto = node_ptr->bmc_protocol ;
if ( node_ptr->bmc_thread_info.proto == BMC_PROTOCOL__REDFISHTOOL )
{
/* build the reset/power control command */
switch (command)
{
case BMC_THREAD_CMD__POWER_RESET:
node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ;
node_ptr->bm_cmd.append(node_ptr->bmc_info.reset_action_list.front());
break ;
case BMC_THREAD_CMD__POWER_ON:
node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ;
node_ptr->bm_cmd.append(node_ptr->bmc_info.power_on_action_list.front());
break ;
case BMC_THREAD_CMD__POWER_OFF:
node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ;
node_ptr->bm_cmd.append(node_ptr->bmc_info.power_off_action_list.front());
break ;
}
node_ptr->thread_extra_info.bm_cmd = node_ptr->bm_cmd ;
}
#ifdef WANT_FIT_TESTING
{
bool want_fit = false ;
int fit = FIT_CODE__BMC_COMMAND_SEND ;
if ( daemon_want_fit ( fit, node_ptr->hostname, "root_query" ) == true )
{
want_fit = true ;
}
else if ( daemon_want_fit ( fit, node_ptr->hostname, "bmc_info" ) == true )
{
want_fit = true ;
}
else if (( command == BMC_THREAD_CMD__POWER_STATUS ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "power_status" ) == true ))
{
want_fit = true ;
}
else if ( daemon_want_fit ( fit, node_ptr->hostname, "reset_cause" ) == true )
{
want_fit = true ;
}
else if (( command == BMC_THREAD_CMD__POWER_RESET ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "reset" ) == true ))
{
want_fit = true ;
}
else if (( command == BMC_THREAD_CMD__POWER_ON ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "power_on" ) == true ))
{
want_fit = true ;
}
else if (( command == BMC_THREAD_CMD__POWER_OFF ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "power_off" ) == true ))
{
want_fit = true ;
}
else if (( command == BMC_THREAD_CMD__POWER_CYCLE ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "power_cycle" ) == true ))
{
want_fit = true ;
}
else if (( command == BMC_THREAD_CMD__BOOTDEV_PXE ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "netboot_pxe" ) == true ))
{
want_fit = true ;
}
if ( want_fit == true )
{
slog ("%s FIT %s\n", node_ptr->hostname.c_str(), bmcUtil_getCmd_str(command).c_str() );
node_ptr->bmc_thread_info.status = node_ptr->bmc_thread_ctrl.status = rc = FAIL_FIT ;
node_ptr->bmc_thread_info.status_string = "bmc_command_send fault insertion failure" ;
return ( rc );
}
}
#endif
if (( hostUtil_is_valid_ip_addr ( node_ptr->thread_extra_info.bm_ip ) == true ) &&
( !node_ptr->thread_extra_info.bm_un.empty() ) &&
( !node_ptr->thread_extra_info.bm_pw.empty ()))
{
node_ptr->bmc_thread_ctrl.status = rc =
thread_launch ( node_ptr->bmc_thread_ctrl,
node_ptr->bmc_thread_info ) ;
if ( rc != PASS )
{
elog ("%s failed to launch power control thread (rc:%d)\n",
node_ptr->hostname.c_str(), rc );
}
else
{
blog ("%s %s thread launched with the '%s' command\n",
node_ptr->hostname.c_str(),
node_ptr->bmc_thread_ctrl.name.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str());
}
node_ptr->bmc_thread_ctrl.retries = 0 ;
}
else
{
node_ptr->bmc_thread_ctrl.status = rc =
node_ptr->bmc_thread_info.status = FAIL_INVALID_DATA ;
node_ptr->bmc_thread_info.status_string = "one or more bmc credentials are invalid" ;
wlog ("%s %s %s %s\n", node_ptr->hostname.c_str(),
hostUtil_is_valid_ip_addr (
node_ptr->thread_extra_info.bm_ip ) ? "" : "bm_ip:invalid",
node_ptr->thread_extra_info.bm_un.empty() ? "bm_un:empty" : "",
node_ptr->thread_extra_info.bm_pw.empty() ? "bm_pw:empty" : "");
}
return (rc);
}
/*****************************************************************************
*
* Name : bmc_command_recv
*
* Description: This utility will check for bmc command thread completion.
*
* Returns : PASS is returned if the thread reports done.
* RETRY is returned if the thread has not completed.
* FAIL_RETRY is returned after 10 back-to-back calls return RETRY.
*
*****************************************************************************/
int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr )
{
int rc = RETRY ;
/* check for 'thread done' completion */
if ( thread_done( node_ptr->bmc_thread_ctrl ) == true )
{
if ( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL )
{
if (( rc = node_ptr->bmc_thread_info.status ) != PASS )
{
/* handle the redfishtool root query as a special case because
* it is likely to fail and we don't want un-necessary error logs */
if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY ) &&
(( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE )))
{
blog ("%s bmc redfish %s failed",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str());
}
else if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_INFO ) &&
(( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE )))
{
wlog ("%s bmc redfish %s failed",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str());
}
else
{
elog ("%s bmc redfish %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(),
node_ptr->bmc_thread_info.data.c_str(),
rc,
node_ptr->bmc_thread_info.status,
node_ptr->bmc_thread_info.status_string.c_str());
}
}
else
{
rc = PASS ;
}
}
else /* default is ipmi */
{
if (( rc = node_ptr->bmc_thread_info.status ) != PASS )
{
/* Don't log an error if this is just the BMC Query failure
* used for protocol learning */
if ( node_ptr->bmc_thread_info.command != BMC_THREAD_CMD__BMC_QUERY )
{
elog ("%s %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(),
node_ptr->bmc_thread_info.data.c_str(),
rc,
node_ptr->bmc_thread_info.status,
node_ptr->bmc_thread_info.status_string.c_str());
}
}
else
{
if ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__POWER_RESET )
{
if ( node_ptr->bmc_thread_info.data.find(IPMITOOL_POWER_RESET_RESP) == std::string::npos )
rc = FAIL_RESET_CONTROL ;
}
else if ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__POWER_OFF )
{
if ( node_ptr->bmc_thread_info.data.find(IPMITOOL_POWER_OFF_RESP) == std::string::npos )
rc = FAIL_POWER_CONTROL ;
}
else if ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__POWER_ON )
{
if ( node_ptr->bmc_thread_info.data.find(IPMITOOL_POWER_ON_RESP) == std::string::npos )
rc = FAIL_POWER_CONTROL ;
}
else if ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__POWER_CYCLE )
{
if ( node_ptr->bmc_thread_info.data.find(IPMITOOL_POWER_CYCLE_RESP) == std::string::npos )
rc = FAIL_POWER_CONTROL ;
}
if ( rc )
{
node_ptr->bmc_thread_info.status = rc ;
node_ptr->bmc_thread_info.status_string = ("power command failed");
wlog ("%s %s Response: %s\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str(),
node_ptr->bmc_thread_info.data.c_str());
}
else
{
blog1 ("%s %s Response: %s\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str(),
node_ptr->bmc_thread_info.data.c_str());
}
}
}
#ifdef WANT_FIT_TESTING
if ( rc == PASS )
{
bool want_fit = false ;
int fit = FIT_CODE__BMC_COMMAND_RECV ;
if ( daemon_want_fit ( fit, node_ptr->hostname, "root_query" ) == true )
{
want_fit = true ;
}
if ( daemon_want_fit ( fit, node_ptr->hostname, "bmc_info" ) == true )
{
want_fit = true ;
}
else if ( daemon_want_fit ( fit, node_ptr->hostname, "reset_cause" ) == true )
{
want_fit = true ;
}
else if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__POWER_RESET ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "reset" ) == true ))
{
want_fit = true ;
}
else if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__POWER_ON ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "power_on" ) == true ))
{
want_fit = true ;
}
else if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__POWER_OFF ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "power_off" ) == true ))
{
want_fit = true ;
}
else if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__POWER_CYCLE ) &&
( daemon_want_fit ( fit, node_ptr->hostname, "power_cycle" ) == true ))
{
want_fit = true ;
}
if ( want_fit == true )
{
node_ptr->bmc_thread_info.status = rc = FAIL_FIT ;
node_ptr->bmc_thread_info.status_string = "bmc_command_recv fault insertion failure" ;
}
}
#endif
}
/* handle max retries reached */
if ( rc == PASS )
{
;
}
else if ( node_ptr->bmc_thread_ctrl.retries++ >= BMC__MAX_RECV_RETRIES )
{
wlog ("%s %s command timeout (%d of %d)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
node_ptr->bmc_thread_ctrl.retries,
BMC__MAX_RECV_RETRIES);
rc = FAIL_RETRY;
}
/* handle progressive retry */
else
{
if ( node_ptr->bmc_thread_ctrl.id == 0 )
{
/* don't log a warning for redfish query failures. */
if (( node_ptr->bmc_thread_info.command != BMC_THREAD_CMD__BMC_QUERY ) &&
( node_ptr->bmc_thread_info.command != BMC_THREAD_CMD__BMC_INFO ))
{
wlog ("%s %s command not-running\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str());
}
rc = FAIL_NOT_ACTIVE ;
}
else
{
/* The BMC is sometimes slow,
* No need to log till we reach lalf of the retry threshold */
if ( node_ptr->bmc_thread_ctrl.retries > (BMC__MAX_RECV_RETRIES/2) )
{
ilog ("%s %s command in-progress (polling %d of %d)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
node_ptr->bmc_thread_ctrl.retries,
BMC__MAX_RECV_RETRIES);
}
rc = RETRY ;
}
}
if ( rc != RETRY )
{
node_ptr->bmc_thread_ctrl.done = true ;
node_ptr->bmc_thread_ctrl.retries = 0 ;
node_ptr->bmc_thread_ctrl.id = 0 ;
node_ptr->bmc_thread_info.id = 0 ;
node_ptr->bmc_thread_info.command = 0 ;
}
return (rc);
}
/*****************************************************************************
*
* Name : bmc_command_done
*
* Description: This utility frees the bmc command thread for next execution.
*
*****************************************************************************/
void nodeLinkClass::bmc_command_done ( struct nodeLinkClass::node * node_ptr )
{
node_ptr->bmc_thread_ctrl.done = true ;
}