Add controller-0 to Mtce Heartbeat Service in AIO SX
All system types with the exception of AIO SX adds controller-0 to the heartbeat service. There is no enabled heartbeating in AIO SX so controller-0 was never added. However, without being added the alarms the hbsAgent raises are not cleared over a process startup. The local hbsClient was designed to monitor pmon, effectively monitor the process monitor, and report to the hbsAgent its onging health state. This way if pmond stops functioning maintenance is able to alarm that condition. However, because in AIO SX controller-0 is never added to the heartbeat service the current method of looping over the internal heartbeat service inventory clearing all the hbsAgent owned alarms for each host over a process restart is bypassed. So, the failure mode where pmond is failing and the hbsAgent has raised an alarm against it and is followed by a restart of the hbsAgent that coincides with 'pmond' process recovery, the pmond alarm gets stuck asserted. This update adds controller-0 to the heartbeat service inventory list for all system types so the hbsAgent managed alarms are cleared over a process restart regardless of the system type. Additionally, the following logging improvements were made: - add the network name to the heartbeat start log. - avoid heartbeat stop log when already stopped. Test Plan: PASS: Verify pmond alarm clears over hbsAgent process restart in AIO SX, AOI DX, Standard and Storage Systems. Regression: PASS: Verify Storage System Install and heartbeat PASS: Verify Standard System install and heartbeat PASS: Verify AIO DX install and heartbeat PASS: Verify AIO SX install and heartbeat PASS: Verify heartbeat logs and failure handling PEND: Verify update as a patch Change-Id: I9afd92a0b54296ef1f87ce7d912510649ae7560c Closes-Bug: 1904918 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
821f2840cc
commit
f00de2a311
|
@ -7746,7 +7746,11 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
|
|||
|
||||
if ( true_false == true )
|
||||
{
|
||||
ilog ("%s heartbeat start", hostname.c_str());
|
||||
ilog ("%s %s heartbeat %sstart",
|
||||
hostname.c_str(),
|
||||
get_iface_name_str(iface),
|
||||
node_ptr->monitor[iface] ? "re" : "");
|
||||
|
||||
node_ptr->no_work_log_throttle = 0 ;
|
||||
node_ptr->b2b_misses_count[iface] = 0 ;
|
||||
node_ptr->hbs_misses_count[iface] = 0 ;
|
||||
|
@ -7758,7 +7762,12 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
|
|||
}
|
||||
else
|
||||
{
|
||||
ilog ("%s heartbeat stop", hostname.c_str());
|
||||
if ( node_ptr->monitor[iface] == true )
|
||||
{
|
||||
ilog ("%s %s heartbeat stop",
|
||||
hostname.c_str(),
|
||||
get_iface_name_str(iface));
|
||||
}
|
||||
}
|
||||
node_ptr->monitor[iface] = true_false ;
|
||||
}
|
||||
|
@ -7771,7 +7780,7 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
|
|||
void nodeLinkClass::set_hwmond_monitor_state ( string & hostname, bool state )
|
||||
{
|
||||
if ( hostname.length() )
|
||||
{
|
||||
{
|
||||
struct nodeLinkClass::node* node_ptr ;
|
||||
node_ptr = nodeLinkClass::getNode ( hostname );
|
||||
if ( node_ptr != NULL )
|
||||
|
|
|
@ -1381,6 +1381,7 @@ int daemon_init ( string iface, string nodetype )
|
|||
hbs_ctrl.locked = true ;
|
||||
}
|
||||
|
||||
|
||||
daemon_init_fit();
|
||||
return (rc);
|
||||
}
|
||||
|
@ -1817,6 +1818,10 @@ void daemon_service_run ( void )
|
|||
inv.name = hbsInv.my_hostname ;
|
||||
inv.nodetype = CONTROLLER_TYPE ;
|
||||
hbsInv.add_heartbeat_host ( inv );
|
||||
|
||||
/* add this host to local inventory */
|
||||
hostname_inventory.push_front(hbsInv.my_hostname);
|
||||
ilog ("%s added to inventory (self)", hbsInv.my_hostname.c_str());
|
||||
}
|
||||
|
||||
/* enable the base level signal handler latency monitor */
|
||||
|
@ -2074,6 +2079,7 @@ void daemon_service_run ( void )
|
|||
inv.nodetype = msg.parm[0];
|
||||
hbsInv.add_heartbeat_host ( inv ) ;
|
||||
hostname_inventory.push_back ( inv.name );
|
||||
hostname_inventory.unique(); // avoid duplicates
|
||||
ilog ("%s added to heartbeat service (%d)\n",
|
||||
inv.name.c_str(),
|
||||
inv.nodetype);
|
||||
|
|
Loading…
Reference in New Issue