Ensure hbsClient ready event is cleared over a reboot.
A host sometimes (rarely) fails heartbeat immediately following unlock. The hbsClient sends its ready event every 5 seconds. Mtce uses this event message as a clue that the target host is ready to start heartbeat following Graceful Recovery or in this case Enable sequence. This update fixes a potential race condition where the hbsClient ready event snuck through immediately following the unlock reboot. This tricked mtc into starting heartbeat too early following the online event that follows a reboot which lead to a heartbeat failure. Test Plan: PASS: compute system install PASS: standby controller lock/unlock soak (25 loops) PASS: 2 compute async locked/unlock soak (50 loops each) Regression: PASS: inservice hearbeat failure detection and handling Change-Id: I21699dbb2f0ab7355a9384d78b47a1fd1cea496d Closes-Bug: 1847656 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
cc69bf7b4a
commit
df50847580
@ -1042,8 +1042,6 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
node_ptr->goEnabled = false ;
|
||||
node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__NONE ;
|
||||
|
||||
clear_service_readies ( node_ptr );
|
||||
|
||||
/* Set uptime to zero in mtce and in the database */
|
||||
node_ptr->uptime_save = 0 ;
|
||||
set_uptime ( node_ptr, 0 , false );
|
||||
@ -1083,6 +1081,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
* have come in while we were purging */
|
||||
node_ptr->mtcAlive_online = false ;
|
||||
node_ptr->mtcAlive_offline = true ;
|
||||
clear_service_readies ( node_ptr );
|
||||
break ;
|
||||
}
|
||||
case MTC_ENABLE__MTCALIVE_WAIT:
|
||||
@ -1090,6 +1089,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
|
||||
/* search for the mtc alive message */
|
||||
if ( node_ptr->mtcAlive_online == true )
|
||||
{
|
||||
node_ptr->hbsClient_ready = false ;
|
||||
mtcTimer_reset ( node_ptr->mtcTimer );
|
||||
|
||||
/* Check to see if the host is/got configured correctly */
|
||||
|
Loading…
Reference in New Issue
Block a user