metal/mtce/src/hwmon/hwmonClass.h

612 lines
25 KiB
C++

#ifndef __INCLUDE_HWMONCLASS_H__
#define __INCLUDE_HWMONCLASS_H__
/*
* Copyright (c) 2015-2016 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
*/
#include "nodeBase.h" /* for ... */
#include "hostUtil.h" /* for ... server_enum */
#include "httpUtil.h" /* for ... libEvent */
#include "pingUtil.h" /* for ... ping */
#include "threadUtil.h" /* for ... thread_ctrl_type thread_info_type*/
#include "hwmon.h"
#include "hostClass.h"
#include "hwmonThreads.h"
#include "hwmonSensor.h"
//#include "hwmonIpmi.h" /* for ... sensor_data_type */
typedef enum
{
HWMON_DEL__START = 0,
HWMON_DEL__WAIT,
HWMON_DEL__DONE,
HWMON_DEL__STAGES
} hwmon_delStages_enum ;
class hwmonHostClass
{
private:
struct hwmon_host {
string hostname ;
/** The IP address of the host's board management controller */
string bm_ip ;
/** The PW of the host's board management controller */
string bm_pw ;
/** A string label that represents the board management
* controller type for this host */
string bm_type ;
/** The operator provisioned board management hostname */
string bm_un ;
bool bm_provisioned ;
int empty_secret_log_throttle ;
libEvent secretEvent ;
/** set true once a connection is estabished and
* set false when error recovery is performed on the connection
**/
bool connected ;
/** 'Connected' alternative (see above) for ipmi sensor monitoring.
* We don't 'connect' using ipmi sensor monitoring so a more
* representative word is introduced.
*
* The BMC is 'accessible' once provisioning data is available
* and bmc is verified pingable.
**/
bool accessible;
/** run the delete_handler FSM when set to true */
bool host_delete ;
/** general purpose retry counter */
int retries ;
/** true when host is degraded due to the inability to load group/sensor configuration */
bool degraded ;
/** true when the SENSORCFG alarm is raised due to the inability to load group/sensor configuration */
bool alarmed ;
/** true when sensor config alarm is raised */
bool alarmed_config ;
/* sensor audit interval */
int interval ;
int interval_old ; /* helps show interval change in log */
bool interval_changed ;
/* throttle degrade audit logs */
int degrade_audit_log_throttle ;
/** set to the protocol used to communicate with this server's BMC */
protocol_enum protocol ;
/** Pointer to the previous host in the list */
struct hwmon_host * prev;
/** Pointer to the next host in the list */
struct hwmon_host * next;
struct mtc_timer hostTimer ;
struct mtc_timer addTimer ;
struct mtc_timer secretTimer ;
bool monitor ; /* true if host's sensors are to be monitored */
/* set true by HWMON_SENSOR_MONITOR__POWER handling state before the
* sensor model has been learned. Being false provides hold off
* to learning the sensor model ; which will be created incorrectly
* if learned while the power is off */
bool poweron ;
/* SENSORS */
/* ------- */
/**** New Host Specific Private Constructs for IPMI Monitoring ****/
bool quanta_server ;
/* for bmc ping access monitor */
ping_info_type ping_info ;
/* Sensor Monitoring Thread Structs */
/* the info required by the sensor read thread to issue a ipmitool
* lanplus request to read sensors over the network */
thread_ctrl_type ipmitool_thread_ctrl ; /* control data used to manage the thread */
thread_info_type ipmitool_thread_info ; /* thread info used to execute and post results */
thread_extra_info_type thread_extra_info ; /* extra thread info for sensor monitoring */
/* Ipmi sensor monitoring control structure */
monitor_ctrl_type monitor_ctrl ;
/* number of sensor queries since last process restart */
int sensor_query_count ;
int want_degrade_audit ;
/* the last json string containing the last read sensor data */
string json_ipmi_sensors ;
int sensors ; /**< # of sensors in the sysinv database */
int samples ; /**< # of parsed samples from the reader thread */
/*
* The Main running Sensors Profile for this host.
* This list reflects what is in the sysinv database
* and shown in the UI.
*/
sensor_type sensor[MAX_HOST_SENSORS] ;
sensor_data_type sample[MAX_HOST_SENSORS] ; /* last read analog samples */
/*
* Sequential checksum of all the sensor names in ther various
* sensor lists. See hwmonUtil.cpp for checksum utilities or
* hwmon.h for prototype
*/
unsigned short last_sample_sensor_checksum ;
unsigned short sample_sensor_checksum ;
unsigned short profile_sensor_checksum ;
/* GROUPS */
/* ------ */
/* number of sensors groups provisioned . host */
int groups ;
/* list of groups for this host */
struct sensor_group_type group[MAX_HOST_GROUPS] ;
/* current group monitoring index ; used by the group monitor FSM */
int group_index ;
hwmon_addStages_enum addStage ;
hwmon_delStages_enum delStage ;
int group_mon_log_throttle ;
libEvent event ;
/* indicates whether the group/sensor accounting looks valid.
* i.e. number of sensors in sensor groups adds to equal total
* number of sensors */
bool accounting_ok ;
/* The number of sensor accounting errors , i.e. sensors not found,
* in the current sample set.
*
* If this count reaches MAX_SENSORS_NOT_FOUND then the
* accounting_bad_b4_reload_count below is incremented.
*
* Whenever all the sensors are found then
* this and the accounting_bad_b4_reload_count is cleared. */
int accounting_bad_count ;
/* string that represents the BMC firmware version */
string bmc_fw_version ;
/********** Sensor Model Relearn Handling Controls **********/
/* set to true when a new relearn request is received while not
* already in sensor model relearning mode */
bool relearn_request ;
/* true while in sensor model relearning mode */
bool relearn ;
/* a timer that forces exit from learn mode when it expires */
struct mtc_timer relearnTimer ;
/* Count relearn failure retries.
* Used to avoid repeating some retry operations. */
int relearn_retry_counter ;
/* Store the date/time when learning mode will be disabled.
* Put into error message to tell the administrator when the
* next sensor relearn is permitted when the current request
* is rejected due to already being in relearn mode. */
string relearn_done_date ;
/* a structure used to preserved some key sensor model attributes
* so that they can be restored over/after the relearn action */
model_attr_type model_attributes_preserved ;
};
/** List of allocated host memory.
*
* An array of host pointers.
*/
hwmon_host * host_ptrs[MAX_HOSTS] ;
/** A memory allocation counter.
*
* Should represent the number of hosts in the linked list.
*/
int memory_allocs ;
/** A memory used counter
*
* A variable storing the accumulated host memory
*/
int memory_used ;
struct hwmon_host * hwmon_head ; /**< Host Linked List Head pointer */
struct hwmon_host * hwmon_tail ; /**< Host Linked List Tail pointer */
struct hwmonHostClass::hwmon_host* newHost ( void );
struct hwmonHostClass::hwmon_host* addHost ( string hostname );
struct hwmonHostClass::hwmon_host* getHost ( string hostname );
int remHost ( string hostname );
int delHost ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr );
struct hwmonHostClass::hwmon_host* getHost_timer ( timer_t tid );
int set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr, bool state );
void clear_bm_assertions ( struct hwmonHostClass::hwmon_host * host_ptr );
void free_host_timers ( struct hwmonHostClass::hwmon_host * host_ptr );
/** typically called by an audit, this interface cycles through all
* the sensors looking for any that are in the degrade state and
* sends a degrade request to maintenance if it sees just 1 */
void degrade_state_audit ( struct hwmonHostClass::hwmon_host * host_ptr );
/* FSM handlers */
int add_host_handler ( struct hwmonHostClass::hwmon_host * host_ptr );
int group_mon_handler ( struct hwmonHostClass::hwmon_host * host_ptr );
/* in hwmonSensor.cpp */
int hwmon_load_sensors ( struct hwmonHostClass::hwmon_host * host_ptr , bool & error );
int hwmon_load_groups ( struct hwmonHostClass::hwmon_host * host_ptr , bool & error );
int load_profile_sensors ( struct hwmonHostClass::hwmon_host * host_ptr,
sensor_type * sensor_array_ptr, int max,
bool & error );
int load_profile_groups ( struct hwmonHostClass::hwmon_host * host_ptr,
struct sensor_group_type * group_array_ptr,int max ,
bool & error );
int hwmon_group_sensors ( struct hwmonHostClass::hwmon_host * host_ptr );
int delete_unwanted_sensors ( struct hwmonHostClass::hwmon_host * host_ptr );
/** Host add handler Stage Change member function */
int addStageChange ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr,
hwmon_addStages_enum newHdlrStage );
/** handled deleting a host from the hwmonHostClass object */
int delete_handler ( struct hwmonHostClass::hwmon_host * host_ptr );
void log_sensor_data ( struct hwmonHostClass::hwmon_host * host_ptr, string & sensorname, string data, string note);
void mem_log_info ( struct hwmonHostClass::hwmon_host * host_ptr );
void mem_log_options ( struct hwmonHostClass::hwmon_host * host_ptr );
void mem_log_bm ( struct hwmonHostClass::hwmon_host * host_ptr );
void mem_log_groups ( struct hwmonHostClass::hwmon_host * host_ptr );
void mem_log_threads ( struct hwmonHostClass::hwmon_host * host_ptr );
/************* New Private APIs for IPMI Sensor Monitoring **************/
/*************************************************************************
*
* Implemented in hwmonClass.cpp
*
*************************************************************************/
void ipmi_bmc_data_init ( struct hwmonHostClass::hwmon_host * host_ptr );
/***************************************************************************
*
* The following are sensor model provisioning APIs responsible for
* loading, creating and deleting sensor models wrt the sysinv database
* and hwmond.
*
* Implemented in hwmonModel.cpp
*
* ipmi_load_sensor_model - will load an existing sensor and group
* model from the database for the specified
* host into hwmond.
*
* ipmi_create_sensor_model - will create a new sensor and group model in
* the sysinv database for the specified host.
*
* ipmi_delete_sensor_model - will delete the sensor and group model from
* the sysinv database for the specified host.
*
* ipmi_create_sample_model - will create a sensor model based on sample
* data for the specified host.
*
* ipmi_create_quanta_model - will create a quanta server sensor group model
* for the specified host from sensor sample data.
*
*************************************************************************/
int ipmi_load_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr );
int ipmi_create_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr );
int ipmi_delete_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr );
int ipmi_create_sample_model ( struct hwmonHostClass::hwmon_host * host_ptr );
int ipmi_create_quanta_model ( struct hwmonHostClass::hwmon_host * host_ptr );
/*************************************************************************
*
* The following are sensor sample sensor data management APIs
*
* File: hwmonIpmi.cpp
*
* ipmi_load_sensor_samples - loads the samples into the sample list.
*
* ipmi_update_sensors - updates the the hwmond with the latest sensor
* sample severity level for the specified host.
*
*************************************************************************/
int ipmi_load_sensor_samples ( struct hwmonHostClass::hwmon_host * host_ptr, char * msg_ptr );
int ipmi_update_sensors ( struct hwmonHostClass::hwmon_host * host_ptr );
/**************************************************************************
*
* Name : manage_startup_states
*
* Purpose: Manage how hwmon deals with sensor states over process startup.
*
* File : hwmonHdlr.cpp
*
* This code that was taken from the add_handler and put into this stand
* alone procedure for code re-use so that it can be called by the add
* handler for ipmi without cloning it.
*
**************************************************************************/
bool manage_startup_states ( struct hwmonHostClass::hwmon_host * host_ptr );
/**************************************************************************
*
* Handle ipmitool monitoring audit interval changes where there is one
* interval for all sensor groups. Changing a single group's audit
* interval does so for all. All for 1 and one for all.
*
**************************************************************************/
int interval_change_handler( struct hwmonHostClass::hwmon_host * host_ptr );
/* The sensor monitor FSM */
int ipmi_sensor_monitor ( struct hwmonHostClass::hwmon_host * host_ptr );
/* Remove all groups / sensor from hwmon */
int hwmon_del_groups ( struct hwmonHostClass::hwmon_host * host_ptr );
int hwmon_del_sensors ( struct hwmonHostClass::hwmon_host * host_ptr );
/* Implemented in hwmonGroup.cpp */
/***************************************************************************
* Manage sensor group states in the database and hwmon as well
* and manage sensr group alarms. Since state changes affect alarming
* the two functions work well together.
***************************************************************************/
int ipmi_set_group_state ( struct hwmonHostClass::hwmon_host * host_ptr, string state );
/* Set all sensors to disabled-offline state/status */
int ipmi_disable_sensors ( struct hwmonHostClass::hwmon_host * host_ptr );
/****************************************************************************
* Create sensor groups in hwmon based on sample data using similar ipmi
* unit type canned groups and save those groups into the database.
****************************************************************************/
int ipmi_create_groups ( struct hwmonHostClass::hwmon_host * host_ptr );
/****************************************************************************
* Load the sensor samples into hwmon and then save them into the database.
****************************************************************************/
int ipmi_create_sensors ( struct hwmonHostClass::hwmon_host * host_ptr );
/*****************************************************************************
* Add a new group to hwmon and then to the sysinv database.
****************************************************************************/
int ipmi_add_group ( struct hwmonHostClass::hwmon_host * host_ptr ,
string datatype, string sensortype,
canned_group_enum grouptype,
string group_name, string path );
/****************************************************************************
* Put the current ipmi sensor list into the previously created sensor type
* based groups and save that grouping in the sysinv database.
*****************************************************************************/
int ipmi_group_sensors ( struct hwmonHostClass::hwmon_host * host_ptr );
/***************************************************************************
* Check whether the group/sensor accounting looks valid.
* i.e. number of sensors in sensor groups adds to equal total sensors.
**************************************************************************/
void check_accounting ( struct hwmonHostClass::hwmon_host * host_ptr );
/***************************************************************************
* Force monitoring to start now
**************************************************************************/
void monitor_now ( struct hwmonHostClass::hwmon_host * host_ptr );
/***************************************************************************
* Force monitoring to start soon ; called during sensor relearn request
* to give horizon time to show the deleted sensor model but not have
* the user wait for what might be a long audit interval before the
* refresh.
**************************************************************************/
void monitor_soon ( struct hwmonHostClass::hwmon_host * host_ptr );
/**************************************************************************
* Save and restore structure and utilties for preserving audit
* interval and group actions over a sensor relearn.
**************************************************************************/
void save_model_attributes ( struct hwmonHostClass::hwmon_host * host_ptr );
void restore_group_actions ( struct hwmonHostClass::hwmon_host * host_ptr,
struct sensor_group_type * group_ptr );
/*************************************************************************/
void sensorState_print_debug ( struct hwmonHostClass::hwmon_host * host_ptr, string sensorname, string proc, int line );
public:
hwmonHostClass(); /**< constructor */
~hwmonHostClass(); /**< destructor */
hostBaseClass hostBase ;
system_type_enum system_type ;
void timer_handler ( int sig, siginfo_t *si, void *uc);
/** This is a list of host names. */
std::list<string> hostlist ;
std::list<string>::iterator hostlist_iter_ptr ;
void hwmon_fsm ( void );
bool is_bm_provisioned ( string hostname );
string get_bm_ip ( string hostname );
string get_bm_type ( string hostname );
string get_bm_un ( string hostname );
string get_hostname ( string uuid ); /**< lookup hostname from the host uuid */
string get_relearn_done_date ( string hostname );
int hosts ;
/* This bool is set in the daemon_configure case to inform the
* FSM that there has been a configuration reload.
* The initial purpose if this bool is to trigger a full sensor
* dump of all hosts on demand */
bool config_reload ;
/********* New Public Constructs for IPMI Sensor Monitoring ***********/
/* set to true once a host has been deleted. This will cause the FSM to
* kick out of the host list to be restarted without this host in it
* any more */
bool host_deleted ;
/* sets the want_degrade_audit = true for all hosts */
void set_degrade_audit ( void );
/************************************************************************/
int add_host ( node_inv_type & inv );
int mod_host ( node_inv_type & inv );
int del_host ( string hostname );
int rem_host ( string hostname );
int mon_host ( string hostname, bool monitor );
int request_del_host ( string hostname );
int ipmi_learn_sensor_model ( string uuid );
/****************************************************************************
*
* Name: get_sensor
*
* Description: Returns a pointer to the host sensor
* that matches the supplied sensor name.
*
****************************************************************************/
sensor_type * get_sensor ( string hostname, string sensorname );
/****************************************************************************
*
* Name: add_sensor
*
* Description: If the return code is PASS then the supplied sensor is
* provisioned against this host. If the sensor already exists
* then it is updated with all the new information. Otherwise
* (normally) a new sensor is added.
*
****************************************************************************/
int add_sensor ( string hostname, sensor_type & sensor );
/****************************************************************************
*
* Name: add_sensor_uuid
*
* Description: Adds the sysinv supplied sensor uuid to hwmon for
* the specified sensor/host.
*
****************************************************************************/
int add_sensor_uuid ( string & hostname, string & name, string & uuid );
/****************************************************************************
*
* Name: hwmon_get_group
*
* Description: Returns a pointer to the host sensor group
* that matches the supplied sensor group name.
****************************************************************************/
struct sensor_group_type * hwmon_get_group ( string hostname, string group_name );
/****************************************************************************
*
* Name: hwmon_get_sensorgroup
*
* Description: Returns a pointer to the host sensor group
* that matches the supplied sensor name.
****************************************************************************/
struct sensor_group_type * hwmon_get_sensorgroup ( string hostname, string sensorname );
/****************************************************************************
*
* Name: hwmon_add_group
*
* Description: If the return code is PASS then the supplied sensor group is
* provisioned against this host. If the group already exists
* then it is updated with all the new information. Otherwise
* (normally) a new group is added to the hwmon class struct.
*
****************************************************************************/
int hwmon_add_group ( string hostname, struct sensor_group_type & sensor_group );
/****************************************************************************
*
* Name: add_group_uuid
*
* Description: Adds the sysinv supplied group uuid to hwmon for
* the specified group/host.
*
****************************************************************************/
int add_group_uuid ( string & hostname, string & name, string & uuid );
int group_modify ( string hostname, string group, string field, string value );
/* TODO: make this a struct hwmonHostClass::hwmon_host * host_ptr */
int manage_sensor_state ( string & hostname, sensor_type * sensor, sensor_severity_enum severity );
void memLogDelimit ( void ); /**< Debug log delimiter */
void memDumpNodeState ( string hostname );
void memDumpAllState ( void );
void print_node_info ( void ); /**< Print node info banner */
/************ New Public API for IPMI Sensor Monitoring *************/
/* Sets a flag that indicates the sensor audit interval has changed.
*
* The DELAY phase of sensor monitoring will look at and will handle
* the change as a background operation. */
void audit_interval_change ( string hostname );
/* Sets host_ptr->interval to the specified value and sets a flag
* that indicates the sensor audit interval has changed.
*
* The DELAY phase of sensor monitoring will look at thes flag this
* API sets and will handle the change as a background operation. */
void modify_audit_interval ( string hostname , int interval );
/************************************************************************/
};
hwmonHostClass * get_hwmonHostClass_ptr ( void );
#endif /* __INCLUDE_HWMONCLASS_H__ */