Merge "Add redfish power/reset/reinstall bmc support to maintenance"

This commit is contained in:
Zuul 2019-09-26 22:19:42 +00:00 committed by Gerrit Code Review
commit 901e81a191
27 changed files with 1264 additions and 332 deletions

View File

@ -14,10 +14,11 @@
using namespace std;
#include "nodeBase.h" /* for ... mtce-common node definitions */
#include "hostUtil.h" /* for ... mtce-common host definitions */
#include "bmcUtil.h" /* for ... mtce-common bmc utility header */
#include "jsonUtil.h" /* for ... json_tokener_parse */
#include "nodeBase.h" /* for ... mtce-common node definitions */
#include "hostUtil.h" /* for ... mtce-common host definitions */
#include "bmcUtil.h" /* for ... mtce-common bmc utility header */
#include "nodeUtil.h" /* for ... tolowercase */
#include "jsonUtil.h" /* for ... jsonUtil_get_key_value_string */
/**********************************************************************
*
@ -133,8 +134,10 @@ string bmcUtil_chop_system_req ( string request )
int bmcUtil_init ( void )
{
daemon_make_dir(BMC_OUTPUT_DIR) ;
daemon_make_dir(BMC_HWMON_TMP_DIR) ;
if ( daemon_is_file_present ( BMC_OUTPUT_DIR ) == false )
daemon_make_dir(BMC_OUTPUT_DIR) ;
if ( daemon_is_file_present ( BMC_HWMON_TMP_DIR ) == false )
daemon_make_dir(BMC_HWMON_TMP_DIR) ;
ipmiUtil_init ();
redfishUtil_init ();
@ -194,6 +197,11 @@ void bmcUtil_info_init ( bmc_info_type & bmc_info )
bmc_info.power_on = false ;
bmc_info.restart_cause.clear() ;
/* clear the supported actions lists */
bmc_info.reset_action_list.clear();
bmc_info.power_on_action_list.clear();
bmc_info.power_off_action_list.clear();
}
/*************************************************************************
@ -232,9 +240,9 @@ void bmcUtil_hwmon_info ( string hostname,
/* add the 'power' state key:val pair */
if ( power_on )
info_str.append("\",\"power\":\"on\"");
info_str.append("\",\"power_state\":\"on\"");
else
info_str.append("\",\"power\":\"off\"");
info_str.append("\",\"power_state\":\"off\"");
/* add the extra data if it exists */
if ( ! extra.empty () )
@ -287,7 +295,13 @@ bool bmcUtil_read_bmc_info( string hostname,
protocol = BMC_PROTOCOL__IPMITOOL ;
else
protocol = BMC_PROTOCOL__REDFISHTOOL ;
json_object_put(json_obj);
ilog ("%s power is %s with bmc communication using %s",
hostname.c_str(),
power_state.c_str(),
bmcUtil_getProtocol_str(protocol).c_str());
return (true);
}
else
@ -298,8 +312,9 @@ bool bmcUtil_read_bmc_info( string hostname,
blog ("%s failed to parse bmc info! set to ipmitool by default!\n", hostname.c_str());
return (false);
}
}
return (true);
}
/*****************************************************************************
*
* Name : bmcUtil_read_hwmond_protocol
@ -431,3 +446,140 @@ string bmcUtil_create_data_fn ( string & hostname,
return ( datafile );
}
/*************************************************************************
*
* Name : bmcUtil_is_power_on
*
* Purpose : Get power state from query response data.
*
* Description: Parse a BMC protocol specific response for current
* power state.
*
* Assumptions: supplied power state is not changed on failure.
*
* Parameters : hostname - string
* protocol - BMC_PROTOCOL__REDFISHTOOL | BMC_PROTOCOL__IPMITOOL
* response - protocol specific power query response data
*
* Updates : power_on - updated if response is queried ok
* set true if power is on
* set false if power is off
*
* Returns : PASS or
* FAIL_NO_DATA , FAIL_JSON_PARSE
*
*************************************************************************/
int bmcUtil_is_power_on ( string hostname,
bmc_protocol_enum protocol,
string & response,
bool & power_on)
{
if ( response.empty() )
{
wlog ("%s bmc power status query response empty",
hostname.c_str());
return (FAIL_NO_DATA);
}
else if ( protocol == BMC_PROTOCOL__REDFISHTOOL )
{
struct json_object *json_obj = json_tokener_parse((char*)response.data());
if ( !json_obj )
{
wlog ("%s failed to tokenize bmc info", hostname.c_str());
return (FAIL_JSON_PARSE) ;
}
else if (tolowercase(jsonUtil_get_key_value_string(json_obj,REDFISH_LABEL__POWER_STATE)) == "on" )
power_on = true ;
else
power_on = false ;
/* free the json object */
json_object_put(json_obj );
}
else /* IPMI */
{
if ( response.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos )
power_on = true ;
else
power_on = false ;
}
return (PASS);
}
/****************************************************************************
*
* Name : bmcUtil_remove_files
*
* Purpose : cleanup temp files.
*
* Description: Called during de-provision to remove temporary files created
* by host provisioning and command output.
*
* Function detects which process is calling it and removes
* only the temp files that daemon created for a specific host.
*
* Assumptions: Keeps the temp dirs clean and current.
*
****************************************************************************/
extern char *program_invocation_short_name;
void bmcUtil_remove_files ( string hostname, bmc_protocol_enum protocol )
{
/* Read in the list of config files and their contents */
std::list<string> filelist ;
std::list<string>::iterator file_ptr ;
string dir = BMC_OUTPUT_DIR ;
dir.append(bmcUtil_getProtocol_str(protocol));
int rc = load_filenames_in_dir ( dir.data(), filelist ) ;
if ( rc )
{
ilog ("%s failed to load files (rc:%d)", hostname.c_str(), rc );
return ;
}
/* files exist as <process>_<hostname>_<suffix> */
if ( !strcmp(MTC_SERVICE_MTCAGENT_NAME, program_invocation_short_name ))
{
for ( file_ptr = filelist.begin();
file_ptr != filelist.end() ;
file_ptr++ )
{
if ( file_ptr->find (program_invocation_short_name) != string::npos )
{
if ( file_ptr->find (hostname) != string::npos )
{
daemon_remove_file ( file_ptr->data() );
blog2 ("%s %s removed", hostname.c_str(), file_ptr->c_str());
}
}
}
}
else if ( !strcmp(MTC_SERVICE_HWMOND_NAME, program_invocation_short_name ))
{
for ( file_ptr = filelist.begin();
file_ptr != filelist.end() ;
file_ptr++ )
{
if ( file_ptr->find (program_invocation_short_name) != string::npos )
{
if ( file_ptr->find (hostname) != string::npos )
{
daemon_remove_file ( file_ptr->data() );
blog2 ("%s %s removed", hostname.c_str(), file_ptr->c_str());
}
}
}
/* remove the static file that specified the protocol that was used to create this host's sensor model */
string hwmond_proto_filename = BMC_HWMON_TMP_DIR ;
hwmond_proto_filename.append("/") ;
hwmond_proto_filename.append(hostname);
daemon_remove_file ( hwmond_proto_filename.data() );
}
}

View File

@ -56,7 +56,9 @@ typedef struct
std::string sn ;
/* actions */
std::list<string> allowable_reset_action_list ;
std::list<string> reset_action_list ;
std::list<string> power_on_action_list ;
std::list<string> power_off_action_list ;
/* state info */
std::string restart_cause ;
@ -140,6 +142,15 @@ void bmcUtil_hwmon_info ( string hostname,
bool power_on,
string extra );
/* Get power state from query response data. */
int bmcUtil_is_power_on ( string hostname,
bmc_protocol_enum protocol,
string & response,
bool & power_on);
void bmcUtil_remove_files ( string hostname,
bmc_protocol_enum protocol );
#include "ipmiUtil.h" /* for ... mtce-common ipmi utility header */
#include "redfishUtil.h" /* for ... mtce-common redfish utility header */

View File

@ -53,6 +53,7 @@
#define MTC_CMD_FIT__LINKLIST ("/var/run/fit/linklist") /* hbsAgent */
#define MTC_CMD_FIT__HBSSILENT ("/var/run/fit/hbs_silent_fault") /* hbsAgent */
#define MTC_CMD_FIT__SENSOR_DATA ("/var/run/fit/sensor_data") /* hwmond */
#define MTC_CMD_FIT__INLINE_CREDS ("/var/run/fit/inline_creds") /* mtcAgent */
#define MTC_CMD_FIT__POWER_CMD ("/var/run/fit/power_cmd_result") /* mtcAgent */
#define MTC_CMD_FIT__ROOT_QUERY ("/var/run/fit/root_query") /* mtcAgent */
#define MTC_CMD_FIT__MC_INFO ("/var/run/fit/mc_info") /* mtcAgent */
@ -63,6 +64,9 @@
#define MTC_CMD_FIT__START_SVCS ("/var/run/fit/host_services") /* mtcClient */
#define MTC_CMD_FIT__NO_HS_ACK ("/var/run/fit/no_hs_ack") /* mtcClient */
#define MTC_CMD_FIT__GOENABLE_AUDIT ("/var/run/fit/goenable_audit") /* mtcAgent */
#define MTC_CMD_FIT__JSON_LEAK_SOAK ("/var/run/fit/json_leak_soak") /* mtcAgent */
#define MTC_CMD_FIT__BMC_ACC_FAIL ("/var/run/fit/bmc_access_fail")/* mtcAgent */
#define MTC_CMD_FIT__MEM_LEAK_DEBUG ("/var/run/fit/mem_leak_debug")/* mtcAgent */
/*****************************************************
* Fault Insertion Codes

View File

@ -239,6 +239,7 @@ extern char *program_invocation_short_name;
/** Scheduling Latency */
#define NSEC_TO_MSEC (1000000)
#define NSEC_TO_SEC (1000000000)
#define llog(format, args...) \
{ syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Latncy: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \

View File

@ -169,7 +169,12 @@ typedef enum
#define CLUSTER_HOST_SUFFIX ((const char*)("-cluster-host"))
#define NONE (const char *)"none"
#define NONE (const char *)"none"
#ifdef UNKNOWN
#undef UNKNOWN
#endif
#define UNKNOWN (const char *)"unknown"
/** Largest heartbeat pulse (req/resp) message size */
#define MAX_API_LOG_LEN (0x1000)

View File

@ -84,7 +84,7 @@
#define MTC_REINSTALL_TIMEOUT_MIN (MTC_MINS_1)
#define MTC_REINSTALL_TIMEOUT_MAX (MTC_HRS_4)
#define MTC_REINSTALL_WAIT_TIMER (10)
#define MTC_IPMITOOL_REQUEST_DELAY (10) /* consider making this shorter */
#define MTC_BMC_REQUEST_DELAY (10) /* consider making this shorter */
#define LAZY_REBOOT_RETRY_DELAY_SECS (60)
#define SM_NOTIFY_UNHEALTHY_DELAY_SECS (5)
#define MTC_MIN_ONLINE_PERIOD_SECS (7)

View File

@ -49,6 +49,7 @@ typedef struct
char msg[PING_MESSAGE_LEN];
} ping6_rx_message_type ;
/*******************************************************************************
*
* Name : pingUtil_init
@ -244,14 +245,16 @@ int pingUtil_send ( ping_info_type & ping_info )
wlog ("%s ping %s send failed (rc:%d) (%d:%m)\n", ping_info.hostname.c_str(), ping_info.ip.c_str(), bytes, errno );
return FAIL ;
}
if ( ping_info.monitoring == false )
if (( ping_info.monitoring == false ) &&
( ping_info.send_retries >= PING_MAX_SEND_RETRIES ))
{
ilog ("%s ping send %s ok ; identity:%04x sequence:%04x (try %d)\n",
ilog ("%s ping send %s ok ; identity:%04x sequence:%04x (try %d of %d)\n",
ping_info.hostname.c_str(),
ping_info.ip.c_str(),
ping_info.identity,
ping_info.sequence,
ping_info.send_retries);
ping_info.send_retries,
PING_MAX_SEND_RETRIES);
}
else
{

View File

@ -11,6 +11,7 @@
#include <stdio.h>
#include <iostream>
#include <string.h>
#include <json-c/json.h> /* for ... json-c json string parsing */
using namespace std;
@ -20,6 +21,12 @@ using namespace std;
#include "jsonUtil.h" /* for ... */
#include "redfishUtil.h" /* for ... this module header */
/* static prioritized list of redfish <named> actions.
* Higher priority action first. */
static std::list<string> reset_actions ;
static std::list<string> poweron_actions ;
static std::list<string> poweroff_actions ;
/*************************************************************************
*
* Name : redfishUtil_init
@ -35,9 +42,203 @@ using namespace std;
int redfishUtil_init ( void )
{
daemon_make_dir(REDFISHTOOL_OUTPUT_DIR) ;
/* Stock reset actions in order of priority */
reset_actions.push_front(REDFISHTOOL_RESET__GRACEFUL_RESTART); /* P1 */
reset_actions.push_back (REDFISHTOOL_RESET__FORCE_RESTART); /* P2 */
poweron_actions.push_front(REDFISHTOOL_POWER_ON__ON);
poweron_actions.push_back (REDFISHTOOL_POWER_ON__FORCE_ON);
poweroff_actions.push_front(REDFISHTOOL_POWER_OFF__GRACEFUL_SHUTDOWN);
poweroff_actions.push_back (REDFISHTOOL_POWER_OFF__FORCE_OFF);
return (PASS);
}
/*************************************************************************
*
* Name : _load_action_lists
*
* Purpose : Load supported host actions.
*
* Description: Filter stock actions through host actions.
*
* Parameters : hostname - this host amer
* host_action_list - what actions this host reports support for.
*
* Updates: bmc_info - reference that includes host action lists
*
*************************************************************************/
void _load_action_lists ( string & hostname,
bmc_info_type & bmc_info,
std::list<string> & host_action_list)
{
bmc_info.reset_action_list.clear();
bmc_info.power_on_action_list.clear();
bmc_info.power_off_action_list.clear();
/* Walk through the host action list looking for and updating
* this host's bmc_info supported actions lists */
std::list<string>::iterator _host_action_list_ptr ;
for ( _host_action_list_ptr = host_action_list.begin();
_host_action_list_ptr != host_action_list.end() ;
_host_action_list_ptr++ )
{
std::list<string>::iterator _action_list_ptr ;
for ( _action_list_ptr = poweroff_actions.begin();
_action_list_ptr != poweroff_actions.end() ;
_action_list_ptr++ )
{
if ( (*_host_action_list_ptr) == (*_action_list_ptr) )
{
bmc_info.power_off_action_list.push_back(*_action_list_ptr) ;
break ;
}
}
for ( _action_list_ptr = poweron_actions.begin();
_action_list_ptr != poweron_actions.end() ;
_action_list_ptr++ )
{
if ( (*_host_action_list_ptr) == (*_action_list_ptr) )
{
bmc_info.power_on_action_list.push_back(*_action_list_ptr) ;
break ;
}
}
for ( _action_list_ptr = reset_actions.begin();
_action_list_ptr != reset_actions.end() ;
_action_list_ptr++ )
{
if ( (*_host_action_list_ptr) == (*_action_list_ptr) )
{
bmc_info.reset_action_list.push_back(*_action_list_ptr) ;
break ;
}
}
}
string reset_tmp = "" ;
string poweron_tmp = "" ;
string poweroff_tmp = "" ;
std::list<string>::iterator _ptr ;
for ( _ptr = bmc_info.reset_action_list.begin();
_ptr != bmc_info.reset_action_list.end() ;
_ptr++ )
{
if ( !reset_tmp.empty() )
reset_tmp.append(",");
reset_tmp.append(*_ptr);
}
for ( _ptr = bmc_info.power_on_action_list.begin();
_ptr != bmc_info.power_on_action_list.end() ;
_ptr++ )
{
if ( !poweron_tmp.empty() )
poweron_tmp.append(",");
poweron_tmp.append(*_ptr);
}
for ( _ptr = bmc_info.power_off_action_list.begin();
_ptr != bmc_info.power_off_action_list.end() ;
_ptr++ )
{
if ( !poweroff_tmp.empty() )
poweroff_tmp.append(",");
poweroff_tmp.append(*_ptr);
}
ilog ("%s bmc actions ; reset:%s power-on:%s power-off:%s",
hostname.c_str(),
reset_tmp.empty() ? "none" : reset_tmp.c_str(),
poweron_tmp.empty() ? "none" : poweron_tmp.c_str(),
poweroff_tmp.empty() ? "none" : poweroff_tmp.c_str());
}
#ifdef SAVE_IMP
int _get_action_list ( string hostname,
redfish_action_enum action,
std::list<string> host_action_list,
std::list<string> & supp_action_list)
{
int status = PASS ;
std::list<string> * action_ptr = NULL ;
string action_str = "" ;
supp_action_list.clear();
switch ( action )
{
case REDFISH_ACTION__RESET:
{
action_ptr = &reset_actions ;
action_str = "reset" ;
break ;
}
case REDFISH_ACTION__POWER_ON:
{
action_ptr = &poweron_actions ;
action_str = "power-on" ;
break ;
}
case REDFISH_ACTION__POWER_OFF:
{
action_ptr = &poweroff_actions ;
action_str = "power-off" ;
break ;
}
default:
{
status = FAIL_BAD_CASE ;
}
}
/* Filter */
if (( status == PASS ) && (action_ptr))
{
/* get the best supported action command
* for the specified action group. */
std::list<string>::iterator _action_list_ptr ;
std::list<string>::iterator _host_action_list_ptr ;
for ( _action_list_ptr = action_ptr->begin();
_action_list_ptr != action_ptr->end() ;
_action_list_ptr++ )
{
for ( _host_action_list_ptr = host_action_list.begin();
_host_action_list_ptr != host_action_list.end() ;
_host_action_list_ptr++ )
{
if ( (*_host_action_list_ptr) == (*_action_list_ptr) )
{
supp_action_list.push_back(*_action_list_ptr) ;
break ;
}
}
}
}
if ( supp_action_list.empty() )
{
elog ("%s has no %s actions", hostname.c_str(), action_str.c_str());
if ( status == PASS )
status = FAIL_STRING_EMPTY ;
}
else
{
string tmp = "" ;
std::list<string>::iterator _ptr ;
for ( _ptr = supp_action_list.begin();
_ptr != supp_action_list.end() ;
_ptr++ )
{
if ( !tmp.empty() )
tmp.append(", ");
tmp.append(*_ptr);
}
ilog ("%s redfish %s actions: %s",
hostname.c_str(),
action_str.c_str(),
tmp.c_str());
}
return (status);
}
#endif
/*************************************************************************
*
* Name : redfishUtil_is_supported
@ -95,22 +296,33 @@ bool redfishUtil_is_supported (string & hostname, string & response)
&major,
&minor,
&revision );
if (( fields ) && ( major >= REDFISH_MIN_MAJOR_VERSION ))
if ( fields )
{
ilog ("%s bmc redfish version %s (%d.%d.%d)",
hostname.c_str(),
redfish_version.c_str(),
major, minor, revision );
return true ;
if (( major >= REDFISH_MIN_MAJOR_VERSION ) && ( minor >= REDFISH_MIN_MINOR_VERSION ))
{
ilog ("%s bmc supports redfish version %s",
hostname.c_str(),
redfish_version.c_str());
return true ;
}
else
{
ilog ("%s bmc redfish version '%s' is below minimum baseline %d.%d.x (%d:%d.%d.%d)",
hostname.c_str(),
redfish_version.c_str(),
REDFISH_MIN_MAJOR_VERSION,
REDFISH_MIN_MINOR_VERSION,
fields, major, minor, revision);
}
}
else
{
ilog ("%s bmc has unsupported redfish version %s (%d:%d.%d.%d)",
wlog ("%s failed to parse redfish version %s",
hostname.c_str(),
redfish_version.c_str(),
fields, major, minor, revision );
blog ("%s response: %s", hostname.c_str(), response.c_str());
redfish_version.c_str());
blog ("%s response: %s",
hostname.c_str(),
response.c_str());
}
}
else
@ -165,13 +377,39 @@ string redfishUtil_create_request ( string cmd,
/* allow the BMC to redirect http to https */
command_request.append(" -S Always");
/* redfishtool default timeout is 10 seconds.
* Seeing requests that are taking a little longer than that.
* defaulting to 20 sec timeout */
command_request.append(" -T 30");
/* specify the bmc ip address */
command_request.append(" -r ");
command_request.append(ip);
/* add the config file option and config filename */
command_request.append(" -c ");
command_request.append(config_file);
#ifdef WANT_INLINE_CREDS
if ( daemon_is_file_present ( MTC_CMD_FIT__INLINE_CREDS ) )
{
string cfg_str = daemon_read_file (config_file.data());
struct json_object *_obj = json_tokener_parse( cfg_str.data() );
if ( _obj )
{
command_request.append(" -u ");
command_request.append(jsonUtil_get_key_value_string(_obj,"username"));
command_request.append(" -p ");
command_request.append(jsonUtil_get_key_value_string(_obj,"password"));
}
else
{
slog("FIT: failed to get creds from config file");
}
}
else
#endif
{
/* add the config file option and config filename */
command_request.append(" -c ");
command_request.append(config_file);
}
/* add the command */
command_request.append(" ");
@ -189,11 +427,66 @@ string redfishUtil_create_request ( string cmd,
/*************************************************************************
*
* Name : redfishUtil_get_bmc_info
* Name : redfishUtil_health_info
*
* Purpose :
* Purpose : Parse the supplied object.
*
* Description:
* Description: Update callers health state, health and health_rollup
* variables with what is contained in the supplied object.
*
* "Status": {
* "HealthRollup": "OK",
* "State": "Enabled",
* "Health": "OK"
* },
*
* Assumptions: Status label must be a first order label.
* This utility does nto walk the object looking for status.
*
* Returns : PASS if succesful
* FAIL_OPERATION if unsuccessful
*
************************************************************************/
int redfishUtil_health_info ( string & hostname,
string entity,
struct json_object * info_obj,
redfish_entity_status & status )
{
if ( info_obj )
{
struct json_object *status_obj = (struct json_object *)(NULL);
json_bool json_rc = json_object_object_get_ex( info_obj,
REDFISH_LABEL__STATUS,
&status_obj );
if (( json_rc == TRUE ) && ( status_obj ))
{
status.state = jsonUtil_get_key_value_string( status_obj,
REDFISH_LABEL__STATE );
status.health = jsonUtil_get_key_value_string( status_obj,
REDFISH_LABEL__HEALTH );
status.health_rollup = jsonUtil_get_key_value_string( status_obj,
REDFISH_LABEL__HEALTHROLLUP );
return (PASS);
}
}
wlog ("%s unable to get %s state and health info",
hostname.c_str(), entity.c_str());
status.state = UNKNOWN ;
status.health = UNKNOWN ;
status.health_rollup = UNKNOWN ;
return (FAIL_OPERATION);
}
/*************************************************************************
*
* Name : redfishUtil_get_bmc_info
*
* Purpose : Parse the Systems get output
*
* Description: Log all important BMC server info such as processors, memory,
* model number, firmware version, hardware part number, etc.
*
* Returns : PASS if succesful
* FAIL_OPERATION if unsuccessful
@ -204,6 +497,11 @@ int redfishUtil_get_bmc_info ( string & hostname,
string & bmc_info_filename,
bmc_info_type & bmc_info )
{
#ifdef WANT_FIT_TESTING
if ( daemon_is_file_present ( MTC_CMD_FIT__MEM_LEAK_DEBUG ))
return (PASS) ;
#endif
if ( bmc_info_filename.empty() )
{
wlog ("%s bmc info filename empty", hostname.c_str());
@ -225,28 +523,6 @@ int redfishUtil_get_bmc_info ( string & hostname,
}
bmc_info.manufacturer = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MANUFACTURER );
bmc_info.sn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__SERIAL_NUMBER);
bmc_info.mn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MODEL_NUMBER );
bmc_info.pn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__PART_NUMBER );
bmc_info.bmc_ver = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__BMC_VERSION );
bmc_info.bios_ver = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__BIOS_VERSION );
ilog ("%s manufacturer is %s", hostname.c_str(), bmc_info.manufacturer.c_str());
ilog ("%s model number:%s part number:%s serial number:%s",
hostname.c_str(),
bmc_info.mn.c_str(),
bmc_info.pn.c_str(),
bmc_info.sn.c_str());
ilog ("%s BIOS firmware version is %s",
hostname.c_str(),
bmc_info.bios_ver != NONE ? bmc_info.bios_ver.c_str() : "unavailable" );
ilog ("%s BMC firmware version is %s",
hostname.c_str(),
bmc_info.bmc_ver != NONE ? bmc_info.bmc_ver.c_str() : "unavailable" );
/* load the power state */
string power_state = tolowercase(jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__POWER_STATE));
if ( power_state == "on" )
@ -255,50 +531,104 @@ int redfishUtil_get_bmc_info ( string & hostname,
bmc_info.power_on = false ;
ilog ("%s power is %s", hostname.c_str(), power_state.c_str());
bmc_info.manufacturer = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MANUFACTURER );
bmc_info.sn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__SERIAL_NUMBER);
bmc_info.mn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MODEL_NUMBER );
bmc_info.pn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__PART_NUMBER );
ilog ("%s manufacturer is %s ; model:%s part:%s serial:%s ",
hostname.c_str(),
bmc_info.manufacturer.c_str(),
bmc_info.mn.c_str(),
bmc_info.pn.c_str(),
bmc_info.sn.c_str());
/* get number of processors */
string processors = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__PROCESSOR );
if ( ! processors.empty() )
bmc_info.bios_ver = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__BIOS_VERSION );
if (( !bmc_info.bios_ver.empty() ) && ( bmc_info.bios_ver != NONE ))
{
struct json_object *proc_obj = json_tokener_parse((char*)processors.data());
if ( proc_obj )
ilog ("%s BIOS fw version %s",
hostname.c_str(),
bmc_info.bios_ver.c_str());
}
bmc_info.bmc_ver = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__BMC_VERSION );
if (( !bmc_info.bmc_ver.empty() ) && ( bmc_info.bmc_ver != NONE ))
{
ilog ("%s BMC fw version %s",
hostname.c_str(),
bmc_info.bmc_ver.c_str());
}
struct json_object *json_obj_actions;
if ( json_object_object_get_ex(json_obj, REDFISH_LABEL__ACTIONS, &json_obj_actions ))
{
std::list<string> action_list ;
/* get the first level reset action label content */
string json_actions =
jsonUtil_get_key_value_string (json_obj_actions,
REDFISHTOOL_RESET_ACTIONS_LABEL);
if ( jsonUtil_get_list ((char*)json_actions.data(), REDFISHTOOL_RESET_ACTIONS_ALLOWED_LABEL, action_list ) == PASS )
{
bmc_info.processors = jsonUtil_get_key_value_int ( proc_obj, REDFISH_LABEL__COUNT );
ilog ("%s has %d processors", hostname.c_str(), bmc_info.processors);
json_object_put(proc_obj );
_load_action_lists ( hostname, bmc_info, action_list);
}
else
{
slog ("%s processor obj: %s", hostname.c_str(), processors.c_str());
elog ("%s actions list get failed ; [%s]", hostname.c_str(), json_actions.c_str());
}
}
else
{
slog ("%s processor count unavailable", hostname.c_str());
elog ("%s action object get failed", hostname.c_str());
}
/* get number of processors */
struct json_object *proc_obj = (struct json_object *)(NULL);
json_bool json_rc = json_object_object_get_ex( json_obj,
REDFISH_LABEL__PROCESSOR,
&proc_obj );
if (( json_rc == TRUE ) && ( proc_obj ))
{
redfish_entity_status status ;
bmc_info.processors = jsonUtil_get_key_value_int ( proc_obj, REDFISH_LABEL__COUNT );
redfishUtil_health_info ( hostname, REDFISH_LABEL__PROCESSOR,
proc_obj, status) ;
ilog ("%s has %2d Processors ; %s and %s:%s",
hostname.c_str(),
bmc_info.processors,
status.state.c_str(),
status.health.c_str(),
status.health_rollup.c_str());
}
else
{
wlog ("%s processor object not found", hostname.c_str());
}
/* get amount of memory */
string memory = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MEMORY );
if ( ! memory.empty() )
struct json_object *mem_obj = (struct json_object *)(NULL);
json_rc = json_object_object_get_ex( json_obj,
REDFISH_LABEL__MEMORY,
&mem_obj );
if (( json_rc == TRUE ) && ( mem_obj ))
{
struct json_object *mem_obj = json_tokener_parse((char*)memory.data());
if ( mem_obj )
{
bmc_info.memory_in_gigs = jsonUtil_get_key_value_int ( mem_obj, REDFISH_LABEL__MEMORY_TOTAL );
ilog ("%s has %d gigs of memory", hostname.c_str(), bmc_info.memory_in_gigs );
json_object_put(mem_obj );
}
else
{
slog ("%s memory obj: %s", hostname.c_str(), memory.c_str() );
}
redfish_entity_status status ;
bmc_info.memory_in_gigs = jsonUtil_get_key_value_int ( mem_obj, REDFISH_LABEL__MEMORY_TOTAL );
redfishUtil_health_info ( hostname, REDFISH_LABEL__MEMORY,
mem_obj, status) ;
ilog ("%s has %d GiB Memory ; %s and %s:%s",
hostname.c_str(),
bmc_info.memory_in_gigs,
status.state.c_str(),
status.health.c_str(),
status.health_rollup.c_str() );
}
else
{
slog ("%s memory size unavailable", hostname.c_str());
wlog ("%s memory object not found", hostname.c_str());
}
json_object_put(json_obj );
return PASS ;
return (PASS) ;
}

View File

@ -21,12 +21,38 @@
/* generic labels */
#define REDFISH_LABEL__STATUS ((const char *)("Status"))
#define REDFISH_LABEL__STATE ((const char *)("State"))
#define REDFISH_LABEL__HEALTH ((const char *)("Health"))
#define REDFISH_LABEL__COUNT ((const char *)("Count"))
#define REDFISH_LABEL__MODEL ((const char *)("Model"))
#define REDFISH_LABEL__HEALTH ((const char *)("Health"))
#define REDFISH_LABEL__HEALTHROLLUP ((const char *)("HealthRollup"))
/* redfish version */
typedef struct
{
/* Enabled indicates the resource is available.
* Disabled indicates the resource has been intentionally made unavailable
* but it can be enabled.
* Offline indicates the resource is unavailable intentionally and requires
* action to be made available.
* InTest indicates that the component is undergoing testing.
* Starting indicates that the resource is on its way to becoming available.
* Absent indicates the resources is physically unavailable */
string state ;
/* Health State of the resource without dependents */
string health ;
/* Health State of the resource and dependents */
string health_rollup ;
} redfish_entity_status ;
/* Redfish version format is #.#.# or major.minor.revision
* This feature does not care about revision.
* The following are the minimum version numbers for major and minor
* for maintenance to accept it as a selectable option */
#define REDFISH_MIN_MAJOR_VERSION (1)
#define REDFISH_MIN_MINOR_VERSION (0)
#define REDFISH_LABEL__REDFISH_VERSION ((const char *)("RedfishVersion"))
/* bmc info labels */
@ -45,17 +71,69 @@
/* server processor info label */
#define REDFISH_LABEL__PROCESSOR ((const char *)("ProcessorSummary"))
/* maintenance administrative action commands */
#define REDFISHTOOL_ROOT_QUERY_CMD ((const char *)("root"))
#define REDFISHTOOL_BMC_INFO_CMD ((const char *)("Systems get"))
/* supported actions */
#define REDFISH_LABEL__ACTIONS ((const char *)("Actions"))
#define REDFISH_LABEL__ACTION_RESET ((const char *)("#ComputerSystem.Reset"))
#define REDFISH_LABEL__ACTION_RESET_ALLOWED ((const char *)("ResetType@Redfish.AllowableValues"))
/* maintenance administrative action commands */
#define REDFISHTOOL_ROOT_QUERY_CMD ((const char *)("root"))
#define REDFISHTOOL_BMC_INFO_CMD ((const char *)("Systems get"))
#define REDFISHTOOL_POWER_RESET_CMD ((const char *)("Systems reset GracefulRestart"))
#define REDFISHTOOL_POWER_ON_CMD ((const char *)("Systems reset On"))
#define REDFISHTOOL_POWER_OFF_CMD ((const char *)("Systems reset ForceOff"))
/* Redfish Reset Types:
*
* https://www.dmtf.org/sites/default/files/standards/documents/DSP0268_2019.1a.pdf */
#define REDFISHTOOL_POWER_RESET_CMD ((const char *)("Systems reset "))
typedef enum
{
REDFISH_ACTION__RESET,
REDFISH_ACTION__POWER_ON,
REDFISH_ACTION__POWER_OFF,
} redfish_action_enum ;
/* Reset actions allows json block
"Actions": {
"#ComputerSystem.Reset": {
"ResetType@Redfish.AllowableValues": [
"On",
"ForceOff",
"GracefulRestart",
"PushPowerButton",
"Nmi"
],
"target": "/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset"
}
},
*/
#define REDFISHTOOL_RESET_ACTIONS_LABEL ((const char *)("#ComputerSystem.Reset")) /* level 1 label */
#define REDFISHTOOL_RESET_ACTIONS_ALLOWED_LABEL ((const char *)("ResetType@Redfish.AllowableValues")) /* level 2 label */
/* Reset sub-commands */
#define REDFISHTOOL_RESET__GRACEFUL_RESTART ((const char *)("GracefulRestart")) /* Perform a graceful shutdown followed by a restart of the system. */
#define REDFISHTOOL_RESET__FORCE_RESTART ((const char *)("ForceRestart")) /* Perform an immediate (non-graceful) shutdown, followed by a restart */
/* Power off sub-commands */
#define REDFISHTOOL_POWER_OFF__GRACEFUL_SHUTDOWN ((const char *)("GracefulShutdown")) /* Perform a graceful shutdown and power off. */
#define REDFISHTOOL_POWER_OFF__FORCE_OFF ((const char *)("ForceOff")) /* Perform a Non-Graceful immediate power off */
/* Power On sub-commands */
#define REDFISHTOOL_POWER_ON__ON ((const char *)("On")) /* Turn the unit on. */
#define REDFISHTOOL_POWER_ON__FORCE_ON ((const char *)("ForceOn")) /* Turn the unit on immediately. */
/* Power Cycle sub-commands */
#define REDFISHTOOL_POWER_CYCLE__POWER_CYCLE ((const char *)("PowerCycle")) /* Perform a power cycle of the unit. */
/* Diagnostic sub-commands */
#define REDFISHTOOL_DIAG__NMI ((const char *)("Nmi") /* Generate a Diagnostic Interrupt to halt the system. */
#define REDFISHTOOL_RESET__PUSH_BUTTON ((const char *)("PushPowerButton")) /* Simulate the pressing of the physical power button on this unit */
#define REDFISHTOOL_BOOTDEV_PXE_CMD ((const char *)("Systems setBootOverride Once Pxe"))
@ -85,4 +163,6 @@ int redfishUtil_get_bmc_info ( string & hostname,
string & response,
bmc_info_type & bmc_info );
string redfishUtil_get_cmd_option ( redfish_action_enum action,
std::list<string> host_action_list );
#endif // __INCLUDE_REDFISHUTIL_H__

View File

@ -25,10 +25,10 @@
*
****************************************************************************/
#include "daemon_common.h" /* for ... daemon_health_test */
#include "nodeBase.h" /* for ... mtce node common definitions */
#include "hostUtil.h" /* for ... mtce host common definitions */
#include "threadUtil.h" /* for ... this module header */
#include "daemon_common.h" /* for ... daemon_health_test */
#include "nodeBase.h" /* for ... mtce node common definitions */
#include "hostUtil.h" /* for ... mtce host common definitions */
#include "threadUtil.h" /* for ... this module header */
/* Stores the parent process's timer handler */
static void (*thread_timer_handler)(int, siginfo_t*, void*) = NULL ;

View File

@ -584,6 +584,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
mtcTimer_init ( ptr->bm_timer, hostname, "bm timer" ); /* Init node's bm timer */
mtcTimer_init ( ptr->bm_ping_info.timer,hostname,"ping timer" ); /* Init node's ping timer */
mtcTimer_init ( ptr->bmc_access_timer, hostname, "bmc acc timer" ); /* Init node's bm access timer */
mtcTimer_init ( ptr->bmc_audit_timer, hostname, "bmc aud timer" ); /* Init node's bm audit timer */
mtcTimer_init ( ptr->host_services_timer, hostname, "host services timer" ); /* host services timer */
mtcTimer_init ( ptr->hwmon_powercycle.control_timer, hostname, "powercycle control timer");
@ -633,11 +634,22 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
/* initialize all board management variables for this host */
ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
ptr->bm_ip = NONE ;
ptr->bm_type = NONE ;
ptr->bm_un = NONE ;
ptr->bm_pw = NONE ;
ptr->bm_cmd= NONE ;
ptr->bm_type = NONE ; /* TODO: OBS */
ptr->bmc_provisioned = false ; /* assume not provisioned until learned */
/* restart command tht need to learned for Redfish.
* ipmi commands are hard coded fro legacy support.
*/
ptr->bm_reset_cmd = NONE ;
ptr->bm_restart_cmd = NONE ;
ptr->bm_poweron_cmd = NONE ;
ptr->bm_poweroff_cmd = NONE ;
ptr->bmc_provisioned = false ; /* assume not provisioned until learned */
ptr->bmc_accessible = false ; /* assume not accessible until proven */
ptr->bmc_access_method_changed = false ;
if ( hostname == my_hostname )
ptr->power_on = true ;
@ -908,6 +920,7 @@ int nodeLinkClass::remNode( string hostname )
mtcTimer_fini ( ptr->bm_timer );
mtcTimer_fini ( ptr->bmc_access_timer );
mtcTimer_fini ( ptr->bmc_audit_timer );
mtcTimer_fini ( ptr->bm_ping_info.timer );
#ifdef WANT_PULSE_LIST_SEARCH_ON_DELETE
@ -2745,7 +2758,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip = inv.bm_ip ;
node_ptr->thread_extra_info.bm_un = node_ptr->bm_un = inv.bm_un ;
node_ptr->thread_extra_info.bm_type= node_ptr->bm_type = inv.bm_type ;
node_ptr->bm_type = inv.bm_type ;
node_ptr->bm_ping_info.sock = 0 ;
@ -4068,9 +4081,35 @@ void nodeLinkClass::bmc_access_data_init ( struct nodeLinkClass::node * node_ptr
node_ptr->reset_cause_query_done = false ;
node_ptr->power_status_query_active = false ;
node_ptr->power_status_query_done = false ;
node_ptr->bmc_protocol_learned = false ;
node_ptr->bmc_protocol_learning = false ;
node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
/* remove all the bmc related temporary files created
* for this host and process */
bmcUtil_remove_files ( node_ptr->hostname, node_ptr->bmc_protocol );
if ( this->bmc_access_method == "ipmi" )
{
blog2 ("%s BMC access method set to 'ipmi'",
node_ptr->hostname.c_str());
node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
node_ptr->bmc_protocol_learning = false ;
node_ptr->bmc_protocol_learned = true ;
}
else if ( this->bmc_access_method == "redfish" )
{
blog2 ("%s BMC access method set to 'redfish'",
node_ptr->hostname.c_str());
node_ptr->bmc_protocol = BMC_PROTOCOL__REDFISHTOOL ;
node_ptr->bmc_protocol_learning = false ;
node_ptr->bmc_protocol_learned = true ;
}
else
{
blog2 ("%s BMC access method will be learned",
node_ptr->hostname.c_str());
node_ptr->bmc_protocol_learned = false ;
node_ptr->bmc_protocol_learning = false ;
node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
}
bmcUtil_info_init ( node_ptr->bmc_info );
}
}
@ -4101,10 +4140,6 @@ int nodeLinkClass::set_bm_prov ( struct nodeLinkClass::node * node_ptr, bool sta
/* Clear the alarm if we are starting fresh from an unprovisioned state */
if (( node_ptr->bmc_provisioned == false ) && ( state == true ))
{
bmcUtil_hwmon_info ( node_ptr->hostname,
node_ptr->bmc_protocol,
node_ptr->power_on, "" );
ilog ("%s starting BM ping monitor to address '%s'\n",
node_ptr->hostname.c_str(),
node_ptr->bm_ip.c_str());
@ -4151,7 +4186,7 @@ int nodeLinkClass::set_bm_prov ( struct nodeLinkClass::node * node_ptr, bool sta
pingUtil_fini ( node_ptr->bm_ping_info );
bmc_access_data_init ( node_ptr );
mtcTimer_reset ( node_ptr->bmc_audit_timer );
if ( !thread_idle( node_ptr->bmc_thread_ctrl ) )
{
thread_kill ( node_ptr->bmc_thread_ctrl , node_ptr->bmc_thread_info);
@ -6889,6 +6924,24 @@ struct nodeLinkClass::node * nodeLinkClass::get_bmc_access_timer ( timer_t tid )
}
struct nodeLinkClass::node * nodeLinkClass::get_bmc_audit_timer ( timer_t tid )
{
/* check for empty list condition */
if ( tid != NULL )
{
for ( struct node * ptr = head ; ; ptr = ptr->next )
{
if ( ptr->bmc_audit_timer.tid == tid )
{
return ptr ;
}
if (( ptr->next == NULL ) || ( ptr == tail ))
break ;
}
}
return static_cast<struct node *>(NULL);
}
struct nodeLinkClass::node * nodeLinkClass::get_mtcConfig_timer ( timer_t tid )
{
@ -8617,14 +8670,20 @@ void nodeLinkClass::mem_log_general_mtce_hosts ( void )
void nodeLinkClass::mem_log_bm ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tBMC %s %s:%s prov:%s learn:%s:%s\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tBMC %s %s:%s prov:%s acc:%s ping:%s learn:%s:%s Query:%s:%s Timer:%s:%s\n",
node_ptr->hostname.c_str(),
bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(),
node_ptr->bm_un.c_str(),
node_ptr->bm_ip.c_str(),
node_ptr->bmc_provisioned ? "Yes" : "No",
node_ptr->bmc_protocol_learned ? "Yes" : "No",
node_ptr->bmc_protocol_learning ? "Yes" : "No");
node_ptr->bmc_provisioned ? "Y" : "N",
node_ptr->bmc_accessible ? "Y" : "N",
node_ptr->bm_ping_info.ok ? "Y" : "N",
node_ptr->bmc_protocol_learned ? "Y" : "N",
node_ptr->bmc_protocol_learning ? "Y" : "N",
node_ptr->bmc_info_query_active ? "Y" : "N",
node_ptr->bmc_info_query_done ? "Y" : "N",
node_ptr->bm_timer.active ? "Y" : "N",
node_ptr->bmc_access_timer.active ? "Y" : "N" );
mem_log (str);
}
@ -8729,14 +8788,16 @@ void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d Power:%d Cycle:%d\n",
node_ptr->hostname.c_str(),
node_ptr->addStage,
node_ptr->offlineStage,
node_ptr->swactStage,
node_ptr->recoveryStage,
node_ptr->enableStage,
node_ptr->disableStage);
node_ptr->disableStage,
node_ptr->powerStage,
node_ptr->powercycleStage);
mem_log (str);
}

View File

@ -583,18 +583,21 @@ private:
/** A string label that represents the board management
* controller type for this host */
string bm_type ;
string bm_type ; /* TODO: OBS */
/** The operator provisioned board management hostname */
string bm_un ;
/* Indicates there is a board management test
* for this host in progress */
bool bm_test_in_progress ;
/** the command to use in the bmc thread.
* introduced for redfish reset sub command ; reset type */
string bm_cmd;
/* Indicates there is a board management operation
* in progress on this host */
bool bm_oper_in_progress ;
/* restart command tht need to learned for Redfish.
* ipmi commands are hard coded fro legacy support. */
string bm_reset_cmd ;
string bm_restart_cmd ;
string bm_poweron_cmd ;
string bm_poweroff_cmd ;
/**
* The BMC is 'accessible' once provisioning data is available
@ -602,6 +605,10 @@ private:
**/
bool bmc_accessible;
/* tell the host level bmc_handler that this hosts access
* method has changed */
bool bmc_access_method_changed ;
/** @} private_boad_management_variables */
/**
@ -694,6 +701,9 @@ private:
/* timer used to manage the bmc access alarm */
struct mtc_timer bmc_access_timer ;
/* timer used to audit bmc info */
struct mtc_timer bmc_audit_timer ;
/*****************************************************
* Maintenance Thread Structs
*****************************************************/
@ -1092,6 +1102,7 @@ private:
struct nodeLinkClass::node * get_ping_timer ( timer_t tid );
struct nodeLinkClass::node * get_bm_timer ( timer_t tid );
struct nodeLinkClass::node * get_bmc_access_timer ( timer_t tid );
struct nodeLinkClass::node * get_bmc_audit_timer ( timer_t tid );
struct nodeLinkClass::node * get_host_services_timer ( timer_t tid );
struct nodeLinkClass::node * get_powercycle_control_timer ( timer_t tid );
@ -1363,6 +1374,8 @@ public:
/* the main fsm entrypoint to service all hosts */
void fsm ( void ) ;
void bmc_access_method_change_notifier ( void );
/** This controller's hostname set'er */
void set_my_hostname ( string hostname );
@ -1486,6 +1499,13 @@ public:
std::list<string> mnfa_awol_list ;
void mnfa_timeout_handler ( void );
/* How to communicate with the BMCs in this lab.
* Options are: ipmi, redfish, learn */
string bmc_access_method ;
/* handle bmc access method change by service parameter */
bool bmc_access_method_changed ;
/** Return the number of inventoried hosts */
int num_hosts ( void );
@ -1676,9 +1696,6 @@ public:
/** Returns number of enabled controllers */
int num_controllers_enabled ( void );
/** Run the FSM against the specified host */
int run_fsm ( string hostname );
/** Post a specific enable handler stage */
int set_enableStage ( string & hostname, mtc_enableStages_enum stage );

View File

@ -340,6 +340,11 @@ void bmcUtil_info_init ( bmc_info_type & bmc_info )
UNUSED(bmc_info);
}
void bmcUtil_remove_files ( string hostname, bmc_protocol_enum protocol )
{
UNUSED(hostname);
UNUSED(protocol);
}
int nodeLinkClass::bmc_command_send ( struct nodeLinkClass::node * node_ptr, int command )
{
UNUSED(node_ptr);

View File

@ -52,7 +52,6 @@ using namespace std;
#define MAX_HOST_SENSORS (512) // (100)
#define MAX_HOST_GROUPS (20)
#define MIN_SENSOR_GROUPS (4)
#define HWMON_MAX_BMC_DATA_BUF_SIZE (4096*8) // Thermal sensor data need 20KiB at least
#define HWMON_DEFAULT_LARGE_INTERVAL (MTC_MINS_15)
#define HWMON_DEFAULT_AUDIT_INTERVAL (MTC_MINS_2)
#define HWMON_MIN_AUDIT_INTERVAL (10)
@ -60,6 +59,10 @@ using namespace std;
#define MAX_SENSORS_NOT_FOUND (5)
#define START_DEBOUCE_COUNT (1)
// Power sensor data for Dell R740-emc-1 needs 45KiB
// Thermal sensor readout on wolfpass requires 20KiB
#define HWMON_MAX_BMC_DATA_BUF_SIZE (102400)
/* Daemon Sensor Config Directory - where profile files are stored */
#define CONFIG_DIR ((const char *)("/etc/hwmon.d"))

View File

@ -91,6 +91,10 @@ void hwmonHostClass::bmc_data_init ( struct hwmonHostClass::hwmon_host * host_pt
host_ptr->addStage = HWMON_ADD__START;
host_ptr->sensor_query_count = 0 ;
/* remove all the bmc related temporary files created
* for this host and process */
bmcUtil_remove_files ( host_ptr->hostname, host_ptr->protocol );
}
/*
@ -611,14 +615,13 @@ int hwmonHostClass::mod_host ( node_inv_type & inv )
{
/* if we have a credentials only change then disable the sensor
* model only to get re-enabled if sensor monitoring is
* successful with the new credentils */
* successful with the new credentials */
if (( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) &&
( host_ptr->bm_un.compare(NONE)))
{
bmc_set_group_state ( host_ptr, "disabled" );
bmc_disable_sensors ( host_ptr );
}
rc = set_bm_prov ( host_ptr, false );
}
if (( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) &&
@ -713,6 +716,8 @@ int hwmonHostClass::add_host ( node_inv_type & inv )
host_ptr->accounting_ok = false ;
host_ptr->accounting_bad_count = 0 ;
host_ptr->general_log_throttle = 0 ;
/* Additions for sensor monitoring using IPMI protocol */
host_ptr->want_degrade_audit = false ;
host_ptr->degrade_audit_log_throttle = 0 ;
@ -758,9 +763,7 @@ int hwmonHostClass::add_host ( node_inv_type & inv )
host_ptr->group_index = 0 ;
/* Set default BMC protocol */
host_ptr->protocol = BMC_PROTOCOL__IPMITOOL ;
host_ptr->bmc_thread_info.proto = BMC_PROTOCOL__IPMITOOL ;
bmcUtil_write_hwmond_protocol ( host_ptr->hostname, BMC_PROTOCOL__IPMITOOL ) ;
host_ptr->protocol = bmcUtil_read_hwmond_protocol(host_ptr->hostname) ;
/* Init sensor model relearn controls, state and status */
host_ptr->relearn = false ;

View File

@ -253,6 +253,7 @@ int daemon_init ( string iface, string nodetype )
hwmon_hdlr_init ( &hwmon_ctrl );
hwmon_stages_init ();
httpUtil_init ();
bmcUtil_init();
/* init the control struct */
hwmon_ctrl.my_hostname = "" ;

View File

@ -58,7 +58,9 @@
int hwmonHostClass::bmc_create_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr )
{
int rc = PASS ;
ilog ("%s creating sensor model\n", host_ptr->hostname.c_str());
ilog ("%s creating sensor model using %s\n",
host_ptr->hostname.c_str(),
bmcUtil_getProtocol_str(host_ptr->protocol).c_str());
host_ptr->groups = 0 ;

View File

@ -160,7 +160,8 @@ using namespace std;
#define BMC_JSON__SENSORS_LABEL ((const char *)("sensors"))
#define IPMITOOL_SENSOR_QUERY_CMD ((const char *)(" sensor list"))
#define BMC_SENSOR_OUTPUT_FILE_SUFFIX ((const char *)("_sensor_data"))
#define BMC_POWER_SENSOR_OUTPUT_FILE_SUFFIX ((const char *)("_power_sensor_data"))
#define BMC_THERMAL_SENSOR_OUTPUT_FILE_SUFFIX ((const char *)("_thermal_sensor_data"))
#define REDFISHTOOL_READ_POWER_SENSORS_CMD ((const char *)("Chassis Power"))
#define REDFISHTOOL_READ_THERMAL_SENSORS_CMD ((const char *)("Chassis Thermal"))

View File

@ -45,9 +45,8 @@ int nodeLinkClass::bmc_command_send ( struct nodeLinkClass::node * node_ptr,
node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ;
node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ;
node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw ;
node_ptr->thread_extra_info.bm_type = node_ptr->bm_type ;
/* Special case handliong for Redfish Root (BMC) Query command.
/* Special case handling for Redfish Root (BMC) Query command.
* Current protocol override for this command that only applies
* to redfish and used for the bmc protocol learning process. */
if ( command == BMC_THREAD_CMD__BMC_QUERY )
@ -55,6 +54,26 @@ int nodeLinkClass::bmc_command_send ( struct nodeLinkClass::node * node_ptr,
else
node_ptr->bmc_thread_info.proto = node_ptr->bmc_protocol ;
if ( node_ptr->bmc_thread_info.proto == BMC_PROTOCOL__REDFISHTOOL )
{
/* build the reset/power control command */
switch (command)
{
case BMC_THREAD_CMD__POWER_RESET:
node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ;
node_ptr->bm_cmd.append(node_ptr->bmc_info.reset_action_list.front());
break ;
case BMC_THREAD_CMD__POWER_ON:
node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ;
node_ptr->bm_cmd.append(node_ptr->bmc_info.power_on_action_list.front());
break ;
case BMC_THREAD_CMD__POWER_OFF:
node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ;
node_ptr->bm_cmd.append(node_ptr->bmc_info.power_off_action_list.front());
break ;
}
node_ptr->thread_extra_info.bm_cmd = node_ptr->bm_cmd ;
}
#ifdef WANT_FIT_TESTING
{
bool want_fit = false ;
@ -170,39 +189,38 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr )
{
if ( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL )
{
/* handle the redfishtool root query as a special case because
* it is likely to fail and we don't want un-necessary error logs */
if ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY )
if (( rc = node_ptr->bmc_thread_info.status ) != PASS )
{
if (( rc = node_ptr->bmc_thread_info.status ) != PASS )
/* handle the redfishtool root query as a special case because
* it is likely to fail and we don't want un-necessary error logs */
if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY ) &&
(( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE )))
{
blog2 ("%s %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(),
node_ptr->bmc_thread_info.data.c_str(),
rc,
node_ptr->bmc_thread_info.status,
node_ptr->bmc_thread_info.status_string.c_str());
blog ("%s bmc redfish %s failed",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str());
}
else if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_INFO ) &&
(( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE )))
{
wlog ("%s bmc redfish %s failed",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str());
}
else
{
ilog("%s Redfish Root Query:\n%s",
node_ptr->hostname.c_str(),
node_ptr->bmc_thread_info.data.c_str());
elog ("%s bmc redfish %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(),
node_ptr->bmc_thread_info.data.c_str(),
rc,
node_ptr->bmc_thread_info.status,
node_ptr->bmc_thread_info.status_string.c_str());
}
}
else if (( rc = node_ptr->bmc_thread_info.status ) != PASS )
{
elog ("%s %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(),
node_ptr->bmc_thread_info.data.c_str(),
rc,
node_ptr->bmc_thread_info.status,
node_ptr->bmc_thread_info.status_string.c_str());
}
else
{
rc = PASS ;
@ -261,11 +279,11 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr )
}
else
{
blog ("%s %s Response: %s\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str(),
node_ptr->bmc_thread_info.data.c_str());
blog1 ("%s %s Response: %s\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str(),
node_ptr->bmc_thread_info.data.c_str());
}
}
}
@ -318,6 +336,10 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr )
}
/* handle max retries reached */
if ( rc == PASS )
{
;
}
else if ( node_ptr->bmc_thread_ctrl.retries++ >= BMC__MAX_RECV_RETRIES )
{
wlog ("%s %s command timeout (%d of %d)\n",
@ -334,18 +356,28 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr )
{
if ( node_ptr->bmc_thread_ctrl.id == 0 )
{
slog ("%s %s command not-running\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str());
/* don't log a warning for redfish query failures. */
if (( node_ptr->bmc_thread_info.command != BMC_THREAD_CMD__BMC_QUERY ) &&
( node_ptr->bmc_thread_info.command != BMC_THREAD_CMD__BMC_INFO ))
{
wlog ("%s %s command not-running\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str());
}
rc = FAIL_NOT_ACTIVE ;
}
else
{
ilog ("%s %s command in-progress (polling %d of %d)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
node_ptr->bmc_thread_ctrl.retries,
BMC__MAX_RECV_RETRIES);
/* The BMC is sometimes slow,
* No need to log till we reach lalf of the retry threshold */
if ( node_ptr->bmc_thread_ctrl.retries > (BMC__MAX_RECV_RETRIES/2) )
{
ilog ("%s %s command in-progress (polling %d of %d)\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
node_ptr->bmc_thread_ctrl.retries,
BMC__MAX_RECV_RETRIES);
}
rc = RETRY ;
}
}

View File

@ -486,7 +486,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
{
dlog ("%s Board Management Interface RESET Requested\n", node_ptr->hostname.c_str());
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY );
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY );
node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET_ACK;
break ;
}
@ -522,7 +522,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
rc = bmc_command_recv ( node_ptr );
if ( rc == RETRY )
{
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY );
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY );
break ;
}
@ -633,7 +633,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->cmdReq).c_str());
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY );
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY );
node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__IPMI_COMMAND_RECV ;
}
break ;

View File

@ -87,7 +87,7 @@ void mtcHttpSvr_fini ( event_type & mtce_event )
/************************************************************************************
*
* event_base_loopcontinue is not supported until version 2.1.2-alpha
* event_base_loopcontinue is not supported until version 2.1.2-alpha
* It allows processing of events in main loop instead of in the handler.
* Theoretically this would be nice to use in conjunction with
* event_base_loopexit in the selected fd
@ -281,26 +281,26 @@ void _create_error_response ( int rc , string & resp_buffer , node_inv_type & in
******************************************************************************/
/* Test Commands:
*
* Test 1: Select host, get uuid and make sure it is unlocked-enabled.
* Test 1: Select host, get uuid and make sure it is unlocked-enabled.
* Verify: Host should fail, reset and auto re-enable.
curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "failed"}'
* Test 2: Lock Host and issue command with correct uuids and hostname.
* Test 2: Lock Host and issue command with correct uuids and hostname.
* Verify: The host is rebooted/reset
curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "failed"}'
* Test 3:
* Test 3:
curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "degraded"}'
* Test 4:
* Test 4:
curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "cleared"}'
* Test 5: Unsuppored VIM Command
curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "degradeded"}'
*/
string mtcHttpSvr_vim_req ( char * buffer_ptr,
evhttp_cmd_type http_cmd,
string mtcHttpSvr_vim_req ( char * buffer_ptr,
evhttp_cmd_type http_cmd,
int & http_status_code )
{
nodeLinkClass * obj_ptr = get_mtcInv_ptr () ;
@ -311,7 +311,7 @@ string mtcHttpSvr_vim_req ( char * buffer_ptr,
int rc1 = jsonUtil_get_key_val ( buffer_ptr, MTC_JSON_SEVERITY, severity );
int rc2 = jsonUtil_get_key_val ( buffer_ptr, MTC_JSON_INV_NAME, hostname );
jlog ("%s '%s' request\n", hostname.c_str(), getHttpCmdType_str(http_cmd));
jlog ("%s '%s' request\n", hostname.c_str(), getHttpCmdType_str(http_cmd));
if ( rc1 | rc2 )
{
wlog ("Failed to parse command key values (%d:%d)\n", rc1, rc2);
@ -337,7 +337,7 @@ string mtcHttpSvr_vim_req ( char * buffer_ptr,
{
/* Test 1 */
ilog ("%s is now failed due to failed event (host is unlocked)\n", hostname.c_str());
obj_ptr->mtcInvApi_update_states ( hostname,
obj_ptr->mtcInvApi_update_states ( hostname,
get_adminState_str (MTC_ADMIN_STATE__UNLOCKED),
get_operState_str (MTC_OPER_STATE__DISABLED ),
get_availStatus_str(MTC_AVAIL_STATUS__FAILED));
@ -356,7 +356,7 @@ string mtcHttpSvr_vim_req ( char * buffer_ptr,
http_status_code = HTTP_BADMETHOD;
}
else if ( ! severity.compare("cleared"))
{
{
/* Test 4 */
ilog ("%s severity 'cleared' request not supported\n", hostname.c_str() );
response.append ("{\"status\" : \"fail\"");
@ -393,8 +393,8 @@ string mtcHttpSvr_vim_req ( char * buffer_ptr,
*
******************************************************************************/
string mtcHttpSvr_inv_req ( char * request_ptr,
evhttp_cmd_type event_type,
string mtcHttpSvr_inv_req ( char * request_ptr,
evhttp_cmd_type event_type,
int & http_status_code )
{
int rc = PASS ;
@ -426,7 +426,7 @@ string mtcHttpSvr_inv_req ( char * request_ptr,
hostname = inv.name ;
}
snprintf (&log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] http event seq: %d Payload:%s: %s",
snprintf (&log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] http event seq: %d Payload:%s: %s",
pt(), getpid(), sequence, hostname.data(), request_ptr);
send_log_message ( mtclogd_ptr, obj_ptr->my_hostname.data(), &filename[0], &log_str[0] );
@ -437,14 +437,14 @@ string mtcHttpSvr_inv_req ( char * request_ptr,
if ( rc == PASS )
{
ilog ("%s Add Operation\n", inv.name.c_str());
/* generate event=add alarm if the add_host returns a PASS */
mtcAlarm_log ( inv.name, MTC_LOG_ID__EVENT_ADD );
}
/* A RETRY return from add_host indicates that the node is
* already provisioned. At this point changes can only be
* implemented as modification so call mod_host
/* A RETRY return from add_host indicates that the node is
* already provisioned. At this point changes can only be
* implemented as modification so call mod_host
*/
if ( rc == RETRY )
{
@ -459,8 +459,8 @@ string mtcHttpSvr_inv_req ( char * request_ptr,
}
else
{
elog ("%s Inventory Add failed (%s)\n",
inv.name.length() ? inv.name.c_str() : "none",
elog ("%s Inventory Add failed (%s)\n",
inv.name.length() ? inv.name.c_str() : "none",
inv.uuid.c_str() );
_create_error_response ( rc , resp_buffer, inv ) ;
}
@ -685,8 +685,8 @@ string mtcHttpSvr_sm_req ( char * request_ptr,
*
* Verify this request contains valid client info.
*
* 1. the URL must have
* CLIENT_SYSINV_URL or
* 1. the URL must have
* CLIENT_SYSINV_URL or
* CLIENT_VIM_HOSTS_URL or
* CLIENT_VIM_SYSTEMS_URL
*
@ -703,16 +703,16 @@ mtc_client_enum _get_client_id ( struct evhttp_request *req )
* correct User-Agent header and supported version */
struct evkeyvalq * headers_ptr = evhttp_request_get_input_headers (req);
const char * header_value_ptr = evhttp_find_header (headers_ptr, CLIENT_HEADER);
if ( header_value_ptr )
if ( header_value_ptr )
{
const char * url_ptr = evhttp_request_get_uri (req);
hlog2 ("URI: %s\n", url_ptr );
if ( ! strncmp ( header_value_ptr, CLIENT_SYSINV_1_0, 20 ) )
{
hlog3 ("%s\n", header_value_ptr );
if ( strstr ( url_ptr, CLIENT_SYSINV_URL) )
{
client = CLIENT_SYSINV ;
@ -721,7 +721,7 @@ mtc_client_enum _get_client_id ( struct evhttp_request *req )
else if ( ! strncmp ( header_value_ptr, CLIENT_VIM_1_0, 20 ) )
{
hlog3 ("%s\n", header_value_ptr );
if ( strstr ( url_ptr, CLIENT_VIM_HOSTS_URL))
{
client = CLIENT_VIM_HOSTS ;
@ -762,14 +762,14 @@ mtc_client_enum _get_client_id ( struct evhttp_request *req )
void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
{
struct evbuffer *resp_buf ;
mtc_client_enum client = CLIENT_NONE ;
mtc_client_enum client = CLIENT_NONE ;
int http_status_code = HTTP_NOTFOUND ;
string service = "" ;
string uuid = "" ;
string response = "" ;
string hostname = "n/a" ;
UNUSED(arg);
UNUSED(arg);
response = "{" ;
response.append (" \"status\" : \"fail\"");
@ -792,7 +792,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
}
const char * url_ptr = evhttp_request_get_uri (req);
/* Extract the operation */
evhttp_cmd_type http_cmd = evhttp_request_get_command (req);
jlog ("%s request from '%s'\n", getHttpCmdType_str(http_cmd), host_ptr );
@ -814,7 +814,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
return ;
}
if (( client == CLIENT_VIM_HOSTS ) ||
if (( client == CLIENT_VIM_HOSTS ) ||
( client == CLIENT_VIM_SYSTEMS ))
{
service = "vim" ;
@ -830,7 +830,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
else
service = "unknown" ;
snprintf (&log_str[0], MAX_API_LOG_LEN-1, "\n%s [%5d] http event seq: %d with %s %s request from %s:%s",
snprintf (&log_str[0], MAX_API_LOG_LEN-1, "\n%s [%5d] http event seq: %d with %s %s request from %s:%s",
pt(), getpid(), ++sequence, service.c_str(), getHttpCmdType_str(http_cmd), host_ptr, url_ptr );
send_log_message ( mtclogd_ptr, obj_ptr->my_hostname.data(), &filename[0], &log_str[0] );
@ -842,7 +842,11 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
size_t len = strlen(CLIENT_SYSINV_URL) ;
uuid = (url_ptr+len) ;
hostname = obj_ptr->get_host(uuid) ;
if (( http_cmd == EVHTTP_REQ_GET ) && ( client == CLIENT_VIM_SYSTEMS ))
if ( hostname.empty() )
{
wlog("uuid to host lookup failed ; '%s' not found ", uuid.c_str());
}
else if (( http_cmd == EVHTTP_REQ_GET ) && ( client == CLIENT_VIM_SYSTEMS ))
{
http_status_code = obj_ptr->mtcVimApi_system_info ( response );
break ;
@ -852,7 +856,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
http_status_code = HTTP_OK ;
if ( uuid.length() != UUID_LEN )
{
wlog ("http '%s' request rejected, invalid uuid size (%ld:%s)\n",
wlog ("http '%s' request rejected, invalid uuid size (%ld:%s)\n",
getHttpCmdType_str(http_cmd),
uuid.length(), uuid.c_str());
response = "{" ;
@ -861,7 +865,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
response.append (",\"action\" : \"Undetermined\"");
response.append ("}");
http_status_code = HTTP_BADREQUEST ;
}
}
if (( http_cmd == EVHTTP_REQ_DELETE ) &&
(( hostname.length() == 0 ) || ( !hostname.compare("none"))))
{
@ -918,9 +922,9 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
{
ev_ssize_t bytes = 0 ;
char * buffer_ptr = (char*)malloc(len+1);
memset ( buffer_ptr, 0, len+1 );
memset ( buffer_ptr, 0, len+1 );
bytes = evbuffer_remove(in_buf, buffer_ptr, len );
if ( bytes <= 0 )
{
http_status_code = HTTP_BADREQUEST ;
@ -976,16 +980,17 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg)
resp_buf = evbuffer_new();
jlog ("Event Response: %s\n", response.c_str());
evbuffer_add_printf (resp_buf, "%s\n", response.data());
evhttp_send_reply (event_ptr->req, http_status_code, "OK", resp_buf );
evhttp_send_reply (event_ptr->req, http_status_code, "OK", resp_buf );
evbuffer_free ( resp_buf );
}
else
{
elog ("HTTP Event error:%d ; cmd:%s url:%s response:%s\n",
http_status_code,
getHttpCmdType_str(http_cmd),
url_ptr,
response.c_str());
wlog ("%s sending %d response for %s:%s request ; response:%s\n",
hostname.c_str(),
http_status_code,
getHttpCmdType_str(http_cmd),
url_ptr,
response.c_str());
evhttp_send_error (event_ptr->req, http_status_code, response.data() );
}
}
@ -1002,7 +1007,7 @@ int mtcHttpSvr_bind ( event_type & event )
int rc ;
int flags ;
int one = 1;
event.fd = socket(AF_INET, SOCK_STREAM, 0);
if (event.fd < 0)
{
@ -1012,13 +1017,13 @@ int mtcHttpSvr_bind ( event_type & event )
/* make socket reusable */
rc = setsockopt(event.fd, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(int));
memset(&event.addr, 0, sizeof(struct sockaddr_in));
event.addr.sin_family = AF_INET;
event.addr.sin_addr.s_addr = inet_addr(LOOPBACK_IP) ; /* INADDR_ANY; TODO: Refine this if we can */
// event.addr.sin_addr.s_addr = INADDR_ANY;
event.addr.sin_port = htons(event.port);
/* bind port */
rc = bind ( event.fd, (struct sockaddr*)&event.addr, sizeof(struct sockaddr_in));
if (rc < 0)
@ -1039,7 +1044,7 @@ int mtcHttpSvr_bind ( event_type & event )
flags = fcntl ( event.fd, F_GETFL, 0) ;
if ( flags < 0 || fcntl(event.fd, F_SETFL, flags | O_NONBLOCK) < 0)
{
elog ("failed to set HTTP server socket to non-blocking (%d:%m)\n", errno );
elog ("failed to set HTTP server socket to non-blocking (%d:%m)\n", errno );
return FAIL_SOCKET_OPTION;
}
@ -1082,7 +1087,7 @@ int mtcHttpSvr_setup ( event_type & event )
return -1;
}
evhttp_set_gencb(event.httpd, mtcHttpSvr_handler, NULL);
return PASS ;
}
@ -1104,7 +1109,7 @@ int mtcHttpSvr_init ( event_type & mtce_event )
}
else if ( mtce_event.fd > 0 )
{
ilog ("Listening On: 'http event server ' socket %s:%d\n",
ilog ("Listening On: 'http event server ' socket %s:%d\n",
inet_ntoa(mtce_event.addr.sin_addr), mtce_event.port );
rc = PASS ;
break ;

View File

@ -327,6 +327,7 @@ static int mtc_config_handler ( void * user,
else if (MATCH("agent", "ar_heartbeat_interval"))
mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = atoi(value);
else
{
return (PASS);
@ -431,6 +432,15 @@ static int mtc_ini_handler ( void * user,
}
}
}
else if (MATCH("agent", "bmc_access_method"))
{
string bmc_access_method_current = mtcInv.bmc_access_method ;
mtcInv.bmc_access_method = value ;
if ( mtcInv.bmc_access_method != bmc_access_method_current )
{
mtcInv.bmc_access_method_changed = true ;
}
}
return (PASS);
}
@ -672,6 +682,8 @@ int daemon_configure ( void )
ilog ("Controller : %s\n",
mtc_config.active ? "Active" : "In-Active" );
ilog ("BMC Access : %s", mtcInv.bmc_access_method.c_str());
/* remove any existing fit */
daemon_init_fit ();
@ -984,6 +996,10 @@ int daemon_init ( string iface, string nodetype )
return ( FAIL_DAEMON_CONFIG ) ;
}
/* bmc access method should not be considered changed if we
* are going through daemon_init ; i.e. process startup */
mtcInv.bmc_access_method_changed = false ;
return (rc);
}
@ -1185,6 +1201,23 @@ void nodeLinkClass::fsm ( void )
}
}
/* handle BMC access method change */
void nodeLinkClass::bmc_access_method_change_notifier ( void )
{
if ( head )
{
struct node * node_ptr ;
for ( node_ptr = head ;
node_ptr != NULL ;
node_ptr = node_ptr->next )
{
if ( node_ptr->bmc_provisioned )
node_ptr->bmc_access_method_changed = true ;
}
}
mtcInv.bmc_access_method_changed = false ;
}
void daemon_service_run ( void )
{
int rc ;
@ -1563,6 +1596,11 @@ void daemon_service_run ( void )
ilog ("DOR mode disable\n");
mtcInv.dor_mode_active = false ;
}
if ( mtcInv.bmc_access_method_changed == true )
{
mtcInv.bmc_access_method_change_notifier();
}
}
daemon_exit ();
}

View File

@ -324,6 +324,19 @@ void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc)
}
}
/* is the bmc audit timer ? */
node_ptr = get_bmc_audit_timer ( *tid_ptr );
if ( node_ptr )
{
/* is this the bm ping timer */
if ( *tid_ptr == node_ptr->bmc_audit_timer.tid )
{
mtcTimer_stop_int_safe ( node_ptr->bmc_audit_timer );
node_ptr->bmc_audit_timer.ring = true ;
return ;
}
}
/* is the host services handler timer ? */
node_ptr = get_host_services_timer ( *tid_ptr );
if ( node_ptr )
@ -2741,7 +2754,7 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr )
}
else
{
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY );
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY );
disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_POWERON_RECV) ;
}
@ -2879,7 +2892,7 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr )
}
else
{
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY );
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY );
disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_POWERON_RECV) ;
}
}
@ -4779,8 +4792,11 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr )
snprintf ( buffer, 255, MTC_TASK_POWEROFF_QUEUE, attempts, MTC_POWER_ACTION_RETRY_COUNT);
mtcInvApi_update_task ( node_ptr, buffer);
/* check the thread error status if thetre is one */
if ( node_ptr->bmc_thread_info.status )
/* Check the thread error status if there is one. Skip the
* typical system call log which just floods the log file.
* The failure is reported in the update task log above. */
if (( node_ptr->bmc_thread_info.status ) &&
( node_ptr->bmc_thread_info.status != FAIL_SYSTEM_CALL))
{
wlog ("%s ... %s (rc:%d)\n", node_ptr->hostname.c_str(),
node_ptr->bmc_thread_info.status_string.c_str(),
@ -4882,7 +4898,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr )
powerStageChange ( node_ptr , MTC_POWERON__POWER_STATUS_WAIT );
}
mtcTimer_reset ( node_ptr->mtcTimer );
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY );
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY );
break ;
}
case MTC_POWERON__POWER_STATUS_WAIT:
@ -4896,21 +4912,38 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr )
}
else if ( rc == PASS )
{
if ( node_ptr->bmc_thread_info.data.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos )
rc = bmcUtil_is_power_on ( node_ptr->hostname,
node_ptr->bmc_protocol,
node_ptr->bmc_thread_info.data,
node_ptr->power_on);
/* If there was an error in querying the power state,
* assume the power is off so that it will be powered on. */
if ( rc )
node_ptr->power_on = false ;
if ( node_ptr->power_on )
{
ilog ("%s power is already on ; no action required\n", node_ptr->hostname.c_str());
node_ptr->power_on = true ;
mtcInvApi_update_task ( node_ptr, "Power Already On" );
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY );
powerStageChange ( node_ptr , MTC_POWERON__DONE );
}
else
{
node_ptr->power_on = false ;
ilog ("%s power is off ; powering on ...\n", node_ptr->hostname.c_str() );
powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND );
}
}
/* failure path handling */
else if ( node_ptr->power_action_retries <= 0 )
{
wlog ("%s current power state query failed ; "
"proceeding with power-on",
node_ptr->hostname.c_str());
powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND );
node_ptr->power_action_retries = MTC_POWER_ACTION_RETRY_COUNT ;
}
else
{
powerStageChange ( node_ptr , MTC_POWERON__POWER_STATUS );
@ -4961,7 +4994,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr )
{
blog ("%s Power-On requested\n", node_ptr->hostname.c_str());
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY );
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY );
powerStageChange ( node_ptr , MTC_POWERON__RESP_WAIT );
}
@ -5194,7 +5227,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
{
if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) )
{
int delay = MTC_IPMITOOL_REQUEST_DELAY ;
int delay = MTC_BMC_REQUEST_DELAY ;
ilog ("%s querying current power state\n", node_ptr->hostname.c_str());
rc = bmc_command_send ( node_ptr, BMC_THREAD_CMD__POWER_STATUS );
@ -5246,22 +5279,14 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
}
else
{
bool on = false ;
ilog ("%s Power Status: %s\n",
node_ptr->hostname.c_str(),
node_ptr->bmc_thread_info.data.c_str());
if ( node_ptr->bmc_thread_info.data.find ( IPMITOOL_POWER_ON_STATUS ) != std::string::npos )
int status =
bmcUtil_is_power_on ( node_ptr->hostname,
node_ptr->bmc_protocol,
node_ptr->bmc_thread_info.data,
node_ptr->power_on);
if ( status == PASS )
{
on = true ;
}
if ( rc == PASS )
{
/* maintain current power state */
node_ptr->power_on = on ;
if ( on == true )
if ( node_ptr->power_on )
{
ilog ("%s invoking 'powerdown' phase\n", node_ptr->hostname.c_str());
@ -5277,9 +5302,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
}
else
{
/* TODO: use FAIL handler */
node_ptr->hwmon_powercycle.retries = MAX_POWERCYCLE_STAGE_RETRIES+1 ;
// powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL );
elog ("%s failed to query power status ; aborting powercycle action\n",
node_ptr->hostname.c_str());
}
@ -5303,7 +5326,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
case MTC_POWERCYCLE__POWEROFF:
{
int delay = MTC_IPMITOOL_REQUEST_DELAY ;
int delay = MTC_BMC_REQUEST_DELAY ;
/* Stop heartbeat if we are powering off the host */
send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST );
@ -5474,7 +5497,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
{
if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) )
{
int delay = MTC_IPMITOOL_REQUEST_DELAY ;
int delay = MTC_BMC_REQUEST_DELAY ;
clog ("%s %s stage\n", node_ptr->hostname.c_str(),
get_powercycleStages_str(node_ptr->powercycleStage).c_str());
@ -5551,7 +5574,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
else
{
wlog ("%s power status query requested\n", node_ptr->hostname.c_str() );
mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY );
mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY );
powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON_VERIFY_WAIT );
}
}
@ -5561,8 +5584,6 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
{
if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) )
{
bool on = false ;
rc = bmc_command_recv ( node_ptr );
if ( rc == RETRY )
{
@ -5571,32 +5592,26 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr )
}
if ( rc == PASS )
{
if ( node_ptr->bmc_thread_info.data.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos )
{
on = true ;
}
rc = bmcUtil_is_power_on ( node_ptr->hostname,
node_ptr->bmc_protocol,
node_ptr->bmc_thread_info.data,
node_ptr->power_on);
}
ilog ("%s power state query result: %s\n",
node_ptr->hostname.c_str(),
node_ptr->bmc_thread_info.data.c_str() );
if (( rc == PASS ) && ( on == true ))
if (( rc == PASS ) && ( node_ptr->power_on ))
{
node_ptr->power_on = true ;
ilog ("%s is Powered On - waiting for 'online' (%d sec timeout)\n",
node_ptr->hostname.c_str(),
MTC_POWERON_TO_ONLINE_TIMEOUT);
mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_BOOT, node_ptr->hwmon_powercycle.attempts );
/* Set the online timeout */
mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_POWERON_TO_ONLINE_TIMEOUT );
powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON_WAIT );
}
else
{
wlog ("%s Power-On failed or did not occur ; retrying (rc:%d:%d)\n", node_ptr->hostname.c_str(), rc, on );
wlog ("%s Power-On failed or did not occur ; retrying (rc:%d)\n", node_ptr->hostname.c_str(), rc );
node_ptr->power_on = false ;
mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_RETRY, node_ptr->hwmon_powercycle.attempts );
mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_BM_POWERON_TIMEOUT );
@ -6177,13 +6192,46 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
{
if ( node_ptr->bmc_provisioned == true )
{
if (( node_ptr->bmc_accessible == true ) && ( node_ptr->bm_ping_info.ok == false ))
#ifdef WANT_FIT_TESTING
if (( node_ptr->bmc_accessible == true ) &&
( node_ptr->bm_ping_info.ok == true ) &&
( daemon_is_file_present ( MTC_CMD_FIT__JSON_LEAK_SOAK ) == true ))
{
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
#endif
/* Handle BMC access method changes */
if ( node_ptr->bmc_access_method_changed )
{
node_ptr->bmc_access_method_changed = false ;
ilog ("%s bmc access method change ; force %s",
node_ptr->hostname.c_str(),
this->bmc_access_method.c_str());
thread_kill ( node_ptr->bmc_thread_ctrl, node_ptr->bmc_thread_info );
bmc_access_data_init ( node_ptr );
pingUtil_fini ( node_ptr->bm_ping_info );
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ;
/* start a timer that will raise the BM Access alarm
* if we are not accessible by the time it expires */
mtcTimer_reset ( node_ptr->bm_timer );
mtcTimer_reset ( node_ptr->bmc_audit_timer );
mtcTimer_reset ( node_ptr->bmc_access_timer );
mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 );
}
if (( node_ptr->bmc_accessible == true ) &&
( node_ptr->bm_ping_info.ok == false ))
{
string bmc_info_filename = "" ;
wlog ("%s bmc access lost\n", node_ptr->hostname.c_str());
/* Be sure the BMC info file is removed.
* The 'hwmond' reads it and gets the bmc fw version from it. */
string bmc_info_filename = "" ;
if ( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL )
{
bmc_info_filename.append(REDFISHTOOL_OUTPUT_DIR) ;
@ -6207,18 +6255,23 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2);
mtcTimer_reset ( node_ptr->bmc_access_timer );
mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 );
mtcTimer_reset ( node_ptr->bmc_audit_timer );
}
/* manage bmc creds refresh ; not expected but should be handled */
if ( node_ptr->bm_ping_info.ok == false )
{
/* Auto correct key ping information ; should ever occur but if it does ... */
if (( node_ptr->bm_ping_info.hostname.empty()) || ( node_ptr->bm_ping_info.ip.empty()))
/* Auto correct key ping information ;
* should never occur but if it does ... */
if (( node_ptr->bm_ping_info.hostname.empty()) ||
( node_ptr->bm_ping_info.ip.empty()))
{
node_ptr->bm_ping_info.hostname = node_ptr->hostname ;
node_ptr->bm_ping_info.ip = node_ptr->bm_ip ;
}
}
/* manage getting the bm password */
if ( node_ptr->thread_extra_info.bm_pw.empty() )
{
barbicanSecret_type * secret = secretUtil_manage_secret( node_ptr->secretEvent,
@ -6235,7 +6288,7 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
/* If the BMC protocol has not yet been learned then do so.
* Default is ipmi unless the target host responds to a
* redfish root query with a minimum version number ; 1.0 */
else if (( node_ptr->bm_ping_info.ok ) &&
else if (( node_ptr->bm_ping_info.ok == true ) &&
( node_ptr->bmc_protocol_learned == false ))
{
if ( node_ptr->bmc_protocol_learning == false )
@ -6277,9 +6330,10 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
else if ( rc != PASS )
{
if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY ) &&
( rc == FAIL_SYSTEM_CALL ))
(( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE )))
{
wlog ("%s bmc does not support Redfish ; " \
/* TODO: may need retries */
plog ("%s bmc does not support Redfish ; " \
"defaulting to ipmi",
node_ptr->hostname.c_str());
}
@ -6292,9 +6346,9 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->bmc_thread_info.status,
node_ptr->bmc_thread_info.status_string.c_str());
}
node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
node_ptr->bmc_protocol_learning = false ;
node_ptr->bmc_protocol_learned = true ;
node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
node_ptr->bmc_thread_ctrl.done = true ;
}
else
@ -6316,13 +6370,12 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
{
node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
}
node_ptr->bmc_protocol_learned = true ;
ilog ("%s bmc supports %s",
blog ("%s bmc supports %s",
node_ptr->hostname.c_str(),
bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str());
node_ptr->bmc_protocol_learning = false ;
node_ptr->bmc_protocol_learned = true ;
node_ptr->bmc_thread_ctrl.done = true ;
}
}
@ -6339,10 +6392,10 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
( node_ptr->bmc_accessible == false ) &&
( node_ptr->bm_ping_info.ok == true ) &&
( node_ptr->bmc_info_query_done == false ) &&
( node_ptr->bmc_protocol_learned == true ) &&
( mtcTimer_expired (node_ptr->bm_timer ) == true ))
{
if (( node_ptr->bmc_info_query_active == false ) &&
( node_ptr->bmc_info_query_done == false ))
if ( node_ptr->bmc_info_query_active == false )
{
if ( bmc_command_send ( node_ptr, BMC_THREAD_CMD__BMC_INFO ) != PASS )
{
@ -6355,14 +6408,13 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
else
{
node_ptr->bmc_info_query_active = true ;
ilog ("%s bmc redfish '%s' in progress", /* ERIK: blog */
blog ("%s bmc redfish '%s' in progress",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str());
mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_FIRST_WAIT );
}
}
else if (( node_ptr->bmc_info_query_active == true ) &&
( node_ptr->bmc_info_query_done == false))
else if ( node_ptr->bmc_info_query_active == true )
{
int rc ;
if ( ( rc = bmc_command_recv ( node_ptr ) ) == RETRY )
@ -6397,15 +6449,18 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
else
{
mtcTimer_reset ( node_ptr->bm_timer );
mtcTimer_reset ( node_ptr->bmc_audit_timer );
mtcTimer_start ( node_ptr->bmc_audit_timer, mtcTimer_handler, MTC_MINS_2 );
plog ("%s bmc audit timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2);
#ifdef REDFISH_INTEGRATION_DONE
/* success path */
node_ptr->bmc_info_query_active = false ;
node_ptr->bmc_accessible = true ;
node_ptr->bmc_info_query_done = true ;
node_ptr->bmc_protocol = BMC_PROTOCOL__REDFISHTOOL ;
node_ptr->bmc_info_query_active = false ;
node_ptr->bmc_protocol_learning = false ;
node_ptr->bmc_protocol_learned = true ;
mtcTimer_reset ( node_ptr->bmc_access_timer );
node_ptr->bmc_accessible = true ;
/* save the host's power state */
node_ptr->power_on = node_ptr->bmc_info.power_on ;
@ -6413,25 +6468,12 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
plog ("%s bmc is accessible using redfish",
node_ptr->hostname.c_str());
/* tell the hardware monitor of the power state and protocol */
bmcUtil_hwmon_info ( node_ptr->hostname,
node_ptr->bmc_protocol,
node_ptr->power_on, "" );
node_ptr->bmc_protocol,
node_ptr->power_on, "" );
send_hwmon_command ( node_ptr->hostname, MTC_CMD_MOD_HOST );
#else
/* Redfish Power Control Commands not Implemented Yet
* Redfish not fully integrated.
* Need to continue to default to IPMI
*
* Start */
node_ptr->bmc_accessible = false ;
node_ptr->bmc_info_query_active = false ;
node_ptr->bmc_info_query_done = false ;
node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
/* End */
#endif
node_ptr->bmc_thread_ctrl.done = true ;
node_ptr->bmc_thread_info.command = 0 ;
@ -6640,6 +6682,112 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
pingUtil_acc_monitor ( node_ptr->bm_ping_info );
}
/* BMC Access Audit for Redfish.
* - used to refresh the host power state */
if (( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) &&
( node_ptr->bmc_provisioned ) &&
( node_ptr->bmc_accessible ) &&
( mtcTimer_expired ( node_ptr->bmc_audit_timer ) == true ) &&
( mtcTimer_expired ( node_ptr->bm_timer ) == true ))
{
if ( node_ptr->bmc_thread_ctrl.done )
{
/* send the BMC Query command */
if ( bmc_command_send ( node_ptr, BMC_THREAD_CMD__BMC_INFO ) != PASS )
{
elog ("%s bmc redfish '%s' send failed\n",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(
node_ptr->bmc_thread_info.command).c_str());
node_ptr->bm_ping_info.ok = false ;
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
else
{
blog1 ("%s bmc redfish '%s' audit in progress",
node_ptr->hostname.c_str(),
bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str());
mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT );
}
}
else if ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_INFO )
{
int rc ;
if ( ( rc = bmc_command_recv ( node_ptr ) ) == RETRY )
{
mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT );
}
else if ( rc != PASS )
{
wlog ("%s bmc audit failed receive (rc:%d)",
node_ptr->hostname.c_str(), rc );
node_ptr->bm_ping_info.ok = false ;
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
else if ( node_ptr->bmc_thread_info.data.empty())
{
wlog ("%s bmc audit failed get bmc query response data",
node_ptr->hostname.c_str());
node_ptr->bm_ping_info.ok = false ;
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
else
{
string filedata = daemon_read_file (node_ptr->bmc_thread_info.data.data()) ;
struct json_object *json_obj =
json_tokener_parse((char*)filedata.data());
if ( json_obj )
{
/* load the power state */
bool power_on ;
string power_state =
tolowercase(jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__POWER_STATE));
if ( power_state == BMC_POWER_ON_STATUS )
power_on = true ;
else if ( power_state == BMC_POWER_OFF_STATUS )
power_on = false ;
else
{
wlog ("%s bmc audit failed to get power state",
node_ptr->hostname.c_str());
node_ptr->bm_ping_info.ok = false ;
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
rc = FAIL_JSON_PARSE ;
}
if ( rc == PASS )
{
if ( power_on != node_ptr->power_on )
{
ilog ("%s power state changed to %s",
node_ptr->hostname.c_str(),
power_state.c_str());
/* tell the hardware monitor of the power state and protocol */
bmcUtil_hwmon_info ( node_ptr->hostname,
node_ptr->bmc_protocol,
power_on, "" );
send_hwmon_command ( node_ptr->hostname, MTC_CMD_MOD_HOST );
}
node_ptr->power_on = power_on ;
blog1 ("%s bmc audit timer re-started (%d secs)\n",
node_ptr->hostname.c_str(), MTC_MINS_2);
mtcTimer_start ( node_ptr->bmc_audit_timer,
mtcTimer_handler,
MTC_MINS_2 );
}
json_object_put(json_obj);
}
else
{
node_ptr->bm_ping_info.ok = false ;
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
wlog ("%s bmc audit failed parse bmc query response",
node_ptr->hostname.c_str());
}
}
}
}
/******************************************************************
* Manage the Board Management Access Alarm
******************************************************************/
@ -6652,11 +6800,11 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
/* start a timer that will raise the BM Access alarm
* if we are not accessible by the time it expires */
plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2);
mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 );
if ( node_ptr->alarms[MTC_ALARM_ID__BM] == FM_ALARM_SEVERITY_CLEAR )
{
plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2);
mtcAlarm_warning ( node_ptr->hostname, MTC_ALARM_ID__BM );
node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_WARNING ;
}
@ -6668,21 +6816,19 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
* ... if BMs are accessible then see if we need to clear the
* major BM Alarm.
*****************************************************************/
else if ( node_ptr->alarms[MTC_ALARM_ID__BM] != FM_ALARM_SEVERITY_CLEAR )
if (( node_ptr->bmc_accessible == true ) &&
( node_ptr->alarms[MTC_ALARM_ID__BM] != FM_ALARM_SEVERITY_CLEAR ) &&
((( node_ptr->bmc_protocol == BMC_PROTOCOL__IPMITOOL ) &&
( node_ptr->bmc_info_query_done == true ) &&
( node_ptr->reset_cause_query_done == true ) &&
( node_ptr->power_status_query_done == true )) ||
(( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) &&
( node_ptr->bmc_protocol_learned == true ))))
{
if ((( node_ptr->bmc_protocol == BMC_PROTOCOL__IPMITOOL ) &&
( node_ptr->bmc_info_query_done == true ) &&
( node_ptr->reset_cause_query_done == true ) &&
( node_ptr->power_status_query_done == true )) ||
(( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) &&
( node_ptr->bmc_protocol_learned == true )))
{
mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM );
node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_CLEAR ;
}
mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM );
node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_CLEAR ;
} /* else alarms already cleared */
} /* end if bmc_provisioned */
else if ( node_ptr->alarms[MTC_ALARM_ID__BM] != FM_ALARM_SEVERITY_CLEAR )
{
mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM );
@ -6885,6 +7031,15 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
}
case MTC_INSV_TEST__RUN:
{
if ( daemon_is_file_present ( MTC_CMD_FIT__BMC_ACC_FAIL ))
{
if ( node_ptr->bm_ping_info.ok )
{
ilog ("%s FIT failing bmc ping monitor", node_ptr->hostname.c_str());
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
}
#ifdef WANT_FIT_TESTING
daemon_load_fit ();
@ -6916,7 +7071,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ;
node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ;
node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw ;
node_ptr->thread_extra_info.bm_type = node_ptr->bm_type ;
node_ptr->bmc_thread_info.extra_info_ptr = &node_ptr->thread_extra_info ;
if ( thread_launch_thread ( mtcThread_bmc, &node_ptr->bmc_thread_info ) == 0 )
{

View File

@ -599,7 +599,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
}
else
{
wlog ("%s is ENABLED-degraded\n", name.c_str());
plog ("%s is ENABLED-degraded\n", name.c_str());
}
enableStageChange ( node_ptr, MTC_ENABLE__DONE );

View File

@ -38,7 +38,6 @@ using namespace std;
#include "mtcThreads.h" /* for ... IPMITOOL_THREAD_CMD__RESET ... */
#include "bmcUtil.h" /* for ... mtce-common bmc utility header */
/**************************************************************************
*
* Name : mtcThread_bmc
@ -111,6 +110,7 @@ void * mtcThread_bmc ( void * arg )
break ;
}
case BMC_THREAD_CMD__BMC_INFO:
case BMC_THREAD_CMD__POWER_STATUS:
{
command = REDFISHTOOL_BMC_INFO_CMD ;
suffix = BMC_INFO_FILE_SUFFIX ;
@ -120,26 +120,25 @@ void * mtcThread_bmc ( void * arg )
/* control commands */
case BMC_THREAD_CMD__POWER_RESET:
{
command = REDFISHTOOL_POWER_RESET_CMD ;
command = extra_ptr->bm_cmd ;
suffix = BMC_POWER_CMD_FILE_SUFFIX ;
break ;
}
case BMC_THREAD_CMD__POWER_ON:
{
command = REDFISHTOOL_POWER_ON_CMD ;
command = extra_ptr->bm_cmd ;
suffix = BMC_POWER_CMD_FILE_SUFFIX ;
break ;
}
case BMC_THREAD_CMD__POWER_OFF:
{
command = REDFISHTOOL_POWER_OFF_CMD ;
command = extra_ptr->bm_cmd ;
suffix = BMC_POWER_CMD_FILE_SUFFIX ;
break ;
}
case BMC_THREAD_CMD__BOOTDEV_PXE:
{
/* json response */
command = REDFISHTOOL_BOOTDEV_PXE_CMD ;
command = REDFISHTOOL_BOOTDEV_PXE_CMD ;
suffix = BMC_BOOTDEV_CMD_FILE_SUFFIX ;
break ;
}
@ -153,6 +152,7 @@ void * mtcThread_bmc ( void * arg )
}
}/* end redfishtool switch */
} /* end if */
/* IPMI */
else
{
switch ( info_ptr->command )
@ -308,17 +308,30 @@ void * mtcThread_bmc ( void * arg )
else
#endif
{
string chopped_request = bmcUtil_chop_system_req(request);
daemon_remove_file ( datafile.data() ) ;
blog_t("%s %s", info_ptr->hostname.c_str(), chopped_request.c_str());
/****** Make the system call ******/
rc =
threadUtil_bmcSystemCall (info_ptr->hostname,
request,
DEFAULT_SYSTEM_REQUEST_LATENCY_SECS);
nodeUtil_latency_log ( info_ptr->hostname, NODEUTIL_LATENCY_MON_START, 0 );
rc = system ( request.data()) ;
if ( rc != PASS )
{
if ( info_ptr->command != BMC_THREAD_CMD__BMC_QUERY )
/* Log the command that failed unless ...
* - its the root query during learning
* - its not the typical falure to reach the BMC whose
* error shows up as a ENOENT or
* 'No such file or directory'
*/
if (( info_ptr->command != BMC_THREAD_CMD__BMC_QUERY ) &&
( errno != ENOENT ))
{
elog_t ("%s redfishtool system call failed (%s) (%d:%d:%m)\n",
elog_t ("%s system call failed [%s] (%d:%d:%m)\n",
info_ptr->hostname.c_str(),
request.c_str(),
chopped_request.c_str(),
rc, errno );
}
info_ptr->status = FAIL_SYSTEM_CALL ;
@ -328,8 +341,6 @@ void * mtcThread_bmc ( void * arg )
info_ptr->status_string = daemon_read_file(datafile.data());
}
}
/* produce latency log if command takes longer than 5 seconds */
nodeUtil_latency_log ( info_ptr->hostname, "redfishtool system call", 5000 );
}
#ifdef WANT_FIT_TESTING
@ -478,15 +489,29 @@ void * mtcThread_bmc ( void * arg )
else
#endif
{
daemon_remove_file ( datafile.data() ) ;
string chopped_request = bmcUtil_chop_system_req(request);
daemon_remove_file ( datafile.data() );
blog_t("%s %s", info_ptr->hostname.c_str(), chopped_request.c_str());
/****** Make the system call ******/
rc =
threadUtil_bmcSystemCall (info_ptr->hostname,
request,
DEFAULT_SYSTEM_REQUEST_LATENCY_SECS);
nodeUtil_latency_log ( info_ptr->hostname, NODEUTIL_LATENCY_MON_START, 0 );
rc = system ( request.data()) ;
if ( rc != PASS )
{
wlog_t ("%s ipmitool system call failed (%d:%d:%m)\n", info_ptr->hostname.c_str(), rc, errno );
elog_t ("%s system call failed [%s] (%d:%d:%m)\n",
info_ptr->hostname.c_str(),
chopped_request.c_str(),
rc, errno );
info_ptr->status = FAIL_SYSTEM_CALL ;
if ( daemon_is_file_present ( datafile.data() ))
{
/* load in the error. stdio is redirected to the datafile */
info_ptr->status_string = daemon_read_file(datafile.data());
}
}
nodeUtil_latency_log ( info_ptr->hostname, "ipmitool system call", 1000 );
}
#ifdef WANT_FIT_TESTING

View File

@ -19,8 +19,7 @@ typedef struct
string bm_ip ;
string bm_un ;
string bm_pw ;
string bm_type ;
string bm_cmd ;
} thread_extra_info_type ;
void * mtcThread_bmc ( void * );

View File

@ -64,7 +64,7 @@ ar_goenable_interval = 30
ar_hostservices_interval = 30
ar_heartbeat_interval = 600
api_retries = 10 ; number of API retries b4 failure
api_retries = 10 ; number of API retries b4 failure
[client] ; Client Configuration
scheduling_priority = 99 ; realtime scheduling; range of 1 .. 99 is acceptable