Fix heartbeat messaging when interface is set to 'lo'
Maintenance heartbeat service should not be multicast messaging over an 'lo' interface which in IPv6 leads to socket failures, log flooding and the inability to detect and report pmond process failure. To fix that this update - configures pulse messaging to unicast for monitored networks configured as 'lo'. - prevents heartbeating over the cluster network if both it and the management network are both configured on the 'lo' interface. - improves logging to avoid flooding in the presence of socket setup or access errors. - stops logging netlink events (interface state changes) on unmonitored network interfaces. - maintains heartbeat disabled state until the management network is up. - modifies hbsAgent socket failure handling and its pmon conf file so that a persistent socket failure during startup is alarmed as an hbsAgent process failure. Test Plan: PASS: Verify logging over system install and socket errors PASS: Verify unicast messaging when cluster is set to 'lo' PASS: Verify no cluster network heartbeat when it and mgmnt are set to 'lo'. Regression: PASS: Verify heartbeat messaging and cluster info PASS: Verify pmond process failure alarm management PASS: Verify heartbeat failure detection and graceful recovery PASS: Verify AIO SX IPv6 system install and run PASS: Verify AIO DX IPv6 system install and run PASS: Verify Standard IPv6 system install and run PASS: Verify Storage system IPv6 install and run PASS: Verify Storage system IPv4 install and run PASS: Verify MNFA handling in IPv6 storage system Change-Id: I5a2a0b2dee0c690617c4e0b0e2ab8b1172b2dc49 Closes-Bug: 1884585 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
fe8dd6d6f4
commit
55d5f43edb
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013, 2015 Wind River Systems, Inc.
|
||||
* Copyright (c) 2013-2020 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
@ -187,6 +187,7 @@ void log_link_events ( int netlink_sock,
|
||||
iter_curr_ptr != links_gone_down.end() ;
|
||||
iter_curr_ptr++ )
|
||||
{
|
||||
bool care = false ;
|
||||
dlog3 ( "downed link: %s (running:%d:%d)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
mgmnt_link_up_and_running,
|
||||
@ -194,6 +195,7 @@ void log_link_events ( int netlink_sock,
|
||||
|
||||
if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data()))
|
||||
{
|
||||
care = true ;
|
||||
if ( mgmnt_link_up_and_running == true )
|
||||
{
|
||||
mgmnt_link_up_and_running = false ;
|
||||
@ -202,6 +204,7 @@ void log_link_events ( int netlink_sock,
|
||||
}
|
||||
if ( !strcmp (clstr_iface_ptr, iter_curr_ptr->data()))
|
||||
{
|
||||
care = true ;
|
||||
if ( clstr_link_up_and_running == true )
|
||||
{
|
||||
clstr_link_up_and_running = false ;
|
||||
@ -209,18 +212,21 @@ void log_link_events ( int netlink_sock,
|
||||
}
|
||||
}
|
||||
|
||||
if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS )
|
||||
if ( care == true )
|
||||
{
|
||||
wlog ("%s is down (oper:%s) (%ld)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
running ? "up" : "down",
|
||||
iter_curr_ptr->length() );
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("%s is down (driver query failed) (len:%ld)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
iter_curr_ptr->length() );
|
||||
if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS )
|
||||
{
|
||||
wlog ("%s is down (oper:%s) (%ld)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
running ? "up" : "down",
|
||||
iter_curr_ptr->length());
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("%s is down (driver query failed) (len:%ld)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
iter_curr_ptr->length());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -233,6 +239,7 @@ void log_link_events ( int netlink_sock,
|
||||
iter_curr_ptr != links_gone_up.end() ;
|
||||
iter_curr_ptr++ )
|
||||
{
|
||||
bool care = false ;
|
||||
dlog3 ( "recovered link: %s (running:%d:%d)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
mgmnt_link_up_and_running,
|
||||
@ -240,27 +247,32 @@ void log_link_events ( int netlink_sock,
|
||||
|
||||
if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data()))
|
||||
{
|
||||
care = true ;
|
||||
mgmnt_link_up_and_running = true ;
|
||||
wlog ("Mgmnt link %s is up\n", mgmnt_iface_ptr );
|
||||
}
|
||||
if ( !strcmp (clstr_iface_ptr, iter_curr_ptr->data()))
|
||||
{
|
||||
care = true ;
|
||||
clstr_link_up_and_running = true ;
|
||||
wlog ("Cluster-host link %s is up\n", clstr_iface_ptr );
|
||||
}
|
||||
|
||||
if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS )
|
||||
if ( care == true )
|
||||
{
|
||||
wlog ("%s is up (oper:%s) (len:%ld)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
running ? "up" : "down",
|
||||
iter_curr_ptr->length() );
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("%s is up (driver query failed) (len:%ld)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
iter_curr_ptr->length() );
|
||||
if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS )
|
||||
{
|
||||
wlog ("%s is up (oper:%s) (len:%ld)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
running ? "up" : "down",
|
||||
iter_curr_ptr->length() );
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("%s is up (driver query failed) (len:%ld)\n",
|
||||
iter_curr_ptr->c_str(),
|
||||
iter_curr_ptr->length() );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef __INCLUDE_NODEBASE_HH__
|
||||
#define __INCLUDE_NODEBASE_HH__
|
||||
/*
|
||||
* Copyright (c) 2013-2016 Wind River Systems, Inc.
|
||||
* Copyright (c) 2013-2020 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
@ -161,10 +161,10 @@ typedef enum
|
||||
|
||||
|
||||
/** 'lo' interface IP address - TODO: get it from the interface */
|
||||
#define LOOPBACK_IP "127.0.0.1"
|
||||
#define LOCALHOST "localhost"
|
||||
#define LOOPBACK_IP "127.0.0.1"
|
||||
#define LOOPBACK_IPV6 "::1"
|
||||
#define LOCALHOST "localhost"
|
||||
|
||||
#define LOOPBACK_IF "lo"
|
||||
|
||||
#define CLUSTER_HOST_SUFFIX ((const char*)("-cluster-host"))
|
||||
|
||||
|
@ -5,8 +5,8 @@ pidfile = /var/run/hbsAgent.pid
|
||||
style = lsb ; ocf or lsb
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 1 ; restart retries before error assertion
|
||||
interval = 10 ; number of seconds to wait between restarts
|
||||
debounce = 10 ; number of seconds that a process needs to remain
|
||||
interval = 5 ; number of seconds to wait between restarts
|
||||
debounce = 20 ; number of seconds that a process needs to remain
|
||||
; running before degrade is removed and retry count
|
||||
; is cleared.
|
||||
startuptime = 5 ; Seconds to wait after process start before starting the debounce monitor
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
|
||||
* Copyright (c) 2013-2020 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
@ -75,6 +75,10 @@ static string arrival_histogram[MAX_IFACES] = { "" , "" } ;
|
||||
static string mtcAgent_ip = "" ;
|
||||
static std::list<string> hostname_inventory ;
|
||||
|
||||
/* Used to throttle warning messages that report
|
||||
* an error transmitting the pulse request */
|
||||
static int pulse_request_fail_log_counter[MAX_IFACES] ;
|
||||
|
||||
/** This heartbeat service inventory is tracked by
|
||||
* the same nodeLinkClass that maintenance uses.
|
||||
*
|
||||
@ -449,7 +453,15 @@ int daemon_configure ( void )
|
||||
ilog ("Clstr Addr : %s\n", hbsInv.my_clstr_ip.c_str());
|
||||
}
|
||||
|
||||
if (!strcmp(hbs_config.clstr_iface, hbs_config.mgmnt_iface))
|
||||
/* The cluster host network is considered unprovisioned
|
||||
* for heartbeat while ...
|
||||
* ... its interface is 'lo' ... */
|
||||
if (!strcmp(hbs_config.clstr_iface, LOOPBACK_IF))
|
||||
{
|
||||
hbsInv.clstr_network_provisioned = false ;
|
||||
}
|
||||
/* ... or it and the management interface are the same. */
|
||||
else if (!strcmp(hbs_config.clstr_iface, hbs_config.mgmnt_iface))
|
||||
{
|
||||
hbsInv.clstr_network_provisioned = false ;
|
||||
}
|
||||
@ -551,20 +563,7 @@ int _setup_pulse_messaging ( iface_enum i, int rmem_max )
|
||||
int rc = PASS ;
|
||||
char * iface = NULL ;
|
||||
|
||||
/* Load up the interface name */
|
||||
if ( i == MGMNT_IFACE )
|
||||
{
|
||||
iface = hbs_config.mgmnt_iface ;
|
||||
}
|
||||
else if (( i == CLSTR_IFACE ) && ( hbs_config.clstr_iface != NULL ))
|
||||
{
|
||||
iface = hbs_config.clstr_iface ;
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("No Cluster-host Interface\n");
|
||||
return (RETRY);
|
||||
}
|
||||
pulse_request_fail_log_counter[i] = 0 ;
|
||||
|
||||
/* Start by closing existing sockets just in case this is a (re)initialization */
|
||||
if ( hbs_sock.rx_sock[i] )
|
||||
@ -579,13 +578,60 @@ int _setup_pulse_messaging ( iface_enum i, int rmem_max )
|
||||
hbs_sock.tx_sock[i] = 0 ;
|
||||
}
|
||||
|
||||
/* Load up the interface name */
|
||||
if ( i == MGMNT_IFACE )
|
||||
{
|
||||
if ( hbsInv.mgmnt_link_up_and_running == false )
|
||||
{
|
||||
wlog("Cannot setup Mgmnt pulse messaging when '%s' interface is down", hbs_config.clstr_iface );
|
||||
return(FAIL_BAD_STATE);
|
||||
}
|
||||
else
|
||||
{
|
||||
iface = hbs_config.mgmnt_iface ;
|
||||
if (strcmp(iface, LOOPBACK_IF))
|
||||
{
|
||||
hbs_sock.tx_sock[i] =
|
||||
new msgClassTx(hbs_config.multicast,hbs_sock.tx_port[i],IPPROTO_UDP,iface);
|
||||
}
|
||||
else
|
||||
{
|
||||
hbs_sock.tx_sock[i] =
|
||||
new msgClassTx(hbsInv.my_local_ip.data(), hbs_sock.tx_port[i],IPPROTO_UDP,iface);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (( i == CLSTR_IFACE ) &&
|
||||
( hbsInv.clstr_network_provisioned == true ) &&
|
||||
( hbs_config.clstr_iface != NULL ))
|
||||
{
|
||||
if ( hbsInv.clstr_link_up_and_running == false )
|
||||
{
|
||||
wlog("Cannot setup Clstr pulse messaging when '%s' interface is down", hbs_config.clstr_iface);
|
||||
return(FAIL_BAD_STATE);
|
||||
}
|
||||
else
|
||||
{
|
||||
iface = hbs_config.clstr_iface ;
|
||||
hbs_sock.tx_sock[i] =
|
||||
new msgClassTx(hbs_config.multicast,hbs_sock.tx_port[i],IPPROTO_UDP,iface);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog("no heartbeat on %s network", get_iface_name_str(i) );
|
||||
return (PASS);
|
||||
}
|
||||
|
||||
/* Create transmit socket */
|
||||
hbs_sock.tx_sock[i] = new msgClassTx(hbs_config.multicast,hbs_sock.tx_port[i],IPPROTO_UDP,iface);
|
||||
if ( hbs_sock.tx_sock[i] )
|
||||
{
|
||||
if ( hbs_sock.tx_sock[i]->return_status != PASS )
|
||||
{
|
||||
elog("Cannot open multicast transmit socket - rc:%d (%d:%m)\n", hbs_sock.tx_sock[i]->return_status, errno );
|
||||
elog("Failed to create %s pulse transmit socket (%d:%d:%m)\n",
|
||||
get_iface_name_str(i),
|
||||
hbs_sock.tx_sock[i]->return_status,
|
||||
errno );
|
||||
delete (hbs_sock.tx_sock[i]);
|
||||
hbs_sock.tx_sock[i] = 0 ;
|
||||
return (FAIL_SOCKET_CREATE);
|
||||
@ -597,10 +643,10 @@ int _setup_pulse_messaging ( iface_enum i, int rmem_max )
|
||||
}
|
||||
else
|
||||
{
|
||||
elog("Cannot open multicast transmit socket - null object (%d:%m)\n", errno );
|
||||
elog("Failed to create %s pulse transmit socket (%d:%m)\n",
|
||||
get_iface_name_str(i), errno );
|
||||
return (FAIL_SOCKET_CREATE);
|
||||
}
|
||||
dlog("Opened multicast transmit socket\n" );
|
||||
|
||||
/* In order to avoid multicast packets being routed wrong, force sending from that socket */
|
||||
hbs_sock.tx_sock[i]->interfaceBind();
|
||||
@ -614,8 +660,10 @@ int _setup_pulse_messaging ( iface_enum i, int rmem_max )
|
||||
hbs_sock.rx_sock[i] = new msgClassRx(hbs_config.multicast,hbs_sock.rx_port[i],IPPROTO_UDP,iface,true);
|
||||
if (( hbs_sock.rx_sock[i] == NULL ) || (hbs_sock.rx_sock[i]->return_status != PASS ))
|
||||
{
|
||||
elog("Failed opening pulse receive socket (%d:%s)\n",
|
||||
errno, strerror (errno));
|
||||
elog("Failed to create %s pulse receive socket (%d:%d:%m)\n",
|
||||
get_iface_name_str(i),
|
||||
hbs_sock.rx_sock[i]->return_status,
|
||||
errno );
|
||||
rc = FAIL_SOCKET_CREATE ;
|
||||
}
|
||||
else
|
||||
@ -948,7 +996,11 @@ int hbs_pulse_request ( iface_enum iface,
|
||||
|
||||
if ( (bytes = hbs_sock.tx_sock[iface]->write((char*)&hbs_sock.tx_mesg[iface], bytes)) < 0 )
|
||||
{
|
||||
elog("Failed to send Pulse request: %d:%s to %s.%d (rc:%i ; %d:%s)\n",
|
||||
/* Throttle this error log. */
|
||||
elog_throttled( pulse_request_fail_log_counter[iface], 100,
|
||||
"Failed to send %s Pulse request: " \
|
||||
"%d:%s to %s.%d (rc:%i ; %d:%s)\n",
|
||||
get_iface_name_str(iface),
|
||||
hbs_sock.tx_mesg[iface].s,
|
||||
&hbs_sock.tx_mesg[iface].m[0],
|
||||
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
||||
@ -959,7 +1011,9 @@ int hbs_pulse_request ( iface_enum iface,
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog("Unable to send pulse request - null tx object - auto re-init pending\n");
|
||||
elog_throttled( pulse_request_fail_log_counter[iface], 100,
|
||||
"Unable to send %s pulse request on null socket",
|
||||
get_iface_name_str(iface));
|
||||
return (FAIL_SOCKET_SENDTO);
|
||||
}
|
||||
|
||||
@ -1448,10 +1502,6 @@ void daemon_service_run ( void )
|
||||
* ultimately triggers an exit if that retry count gets too big */
|
||||
int socket_init_fail_count = 0 ;
|
||||
|
||||
/* Used to throttle warning messages that report
|
||||
* an error transmitting the pulse request */
|
||||
int pulse_request_fail_log_counter[MAX_IFACES] ;
|
||||
|
||||
/* throttle initialization wait logs */
|
||||
int wait_log_throttle = 0 ;
|
||||
|
||||
@ -1561,18 +1611,46 @@ void daemon_service_run ( void )
|
||||
daemon_exit();
|
||||
}
|
||||
|
||||
if ( get_link_state ( hbs_sock.ioctl_sock, hbs_config.mgmnt_iface, &hbsInv.mgmnt_link_up_and_running ) )
|
||||
{
|
||||
hbsInv.mgmnt_link_up_and_running = false ;
|
||||
wlog ("Failed to query %s operational state ; defaulting to down\n", hbs_config.mgmnt_iface );
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("Mgmnt %s link is %s\n", hbs_config.mgmnt_iface, hbsInv.mgmnt_link_up_and_running ? "Up" : "Down" );
|
||||
}
|
||||
|
||||
if ( hbsInv.clstr_network_provisioned == true )
|
||||
{
|
||||
if ( get_link_state ( hbs_sock.ioctl_sock, hbs_config.clstr_iface, &hbsInv.clstr_link_up_and_running ) )
|
||||
{
|
||||
hbsInv.clstr_link_up_and_running = false ;
|
||||
wlog ("Failed to query %s operational state ; defaulting to down\n", hbs_config.clstr_iface );
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("Cluster-host %s link is %s\n", hbs_config.clstr_iface, hbsInv.clstr_link_up_and_running ? "Up" : "Down" );
|
||||
}
|
||||
}
|
||||
|
||||
/* Setup the heartbeat sockets */
|
||||
if ( (rc = hbs_socket_init ()) != PASS )
|
||||
{
|
||||
if ( socket_init_fail_count++ == 10 )
|
||||
#define HBS_SOCKET_INIT_RETRY_THRESHOLD (3)
|
||||
#define HBS_SOCKET_INIT_RETRY_INTERVAL (2)
|
||||
if ( socket_init_fail_count++ == HBS_SOCKET_INIT_RETRY_THRESHOLD )
|
||||
{
|
||||
elog ("Failed socket initialization (rc:%d) max retries ; exiting ...\n", rc );
|
||||
elog ("Failed socket initialization (rc:%d) "
|
||||
"max retries ; exiting ...", rc );
|
||||
daemon_exit ();
|
||||
}
|
||||
else
|
||||
{
|
||||
elog ("Failed socket initialization (rc:%d) ; will retry in 5 secs ...\n", rc );
|
||||
sleep (5);
|
||||
elog ("Failed socket initialization (rc:%d) ; "
|
||||
"will retry in %d secs ...\n",
|
||||
rc, HBS_SOCKET_INIT_RETRY_INTERVAL);
|
||||
sleep (HBS_SOCKET_INIT_RETRY_INTERVAL);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1584,6 +1662,8 @@ void daemon_service_run ( void )
|
||||
rc = send_event ( hbsInv.my_hostname, MTC_EVENT_HEARTBEAT_READY, MGMNT_IFACE ) ;
|
||||
if ( rc == RETRY )
|
||||
{
|
||||
// TODO: Threshold this loop and exit or this
|
||||
// could be a silent process failure loop.
|
||||
mtcWait_secs ( 3 );
|
||||
}
|
||||
} while ( rc == RETRY ) ;
|
||||
@ -1593,29 +1673,6 @@ void daemon_service_run ( void )
|
||||
daemon_exit ();
|
||||
}
|
||||
|
||||
if ( get_link_state ( hbs_sock.ioctl_sock, hbs_config.mgmnt_iface, &hbsInv.mgmnt_link_up_and_running ) )
|
||||
{
|
||||
hbsInv.mgmnt_link_up_and_running = false ;
|
||||
wlog ("Failed to query %s operational state ; defaulting to down\n", hbs_config.mgmnt_iface );
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("Mgmnt %s link is %s\n", hbs_config.mgmnt_iface, hbsInv.mgmnt_link_up_and_running ? "Up" : "Down" );
|
||||
}
|
||||
|
||||
if ( hbsInv.clstr_network_provisioned == true )
|
||||
{
|
||||
if ( get_link_state ( hbs_sock.ioctl_sock, hbs_config.clstr_iface, &hbsInv.clstr_link_up_and_running ) )
|
||||
{
|
||||
hbsInv.clstr_link_up_and_running = false ;
|
||||
wlog ("Failed to query %s operational state ; defaulting to down\n", hbs_config.clstr_iface );
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("Cluster-host %s link is %s\n", hbs_config.clstr_iface, hbsInv.clstr_link_up_and_running ? "Up" : "Down" );
|
||||
}
|
||||
}
|
||||
|
||||
/* Make the main loop schedule in real-time */
|
||||
{
|
||||
struct sched_param param ;
|
||||
@ -1720,7 +1777,8 @@ void daemon_service_run ( void )
|
||||
counter = 1 ;
|
||||
}
|
||||
}
|
||||
else if ( hbsInv.hbs_disabled == true )
|
||||
else if (( hbsInv.hbs_disabled == true ) &&
|
||||
( hbsInv.mgmnt_link_up_and_running == true ))
|
||||
{
|
||||
hbs_ctrl.locked = false ;
|
||||
hbsInv.hbs_disabled = false;
|
||||
@ -2191,12 +2249,11 @@ void daemon_service_run ( void )
|
||||
if ( rc != 0 )
|
||||
{
|
||||
/* TODO: Fix this with an alarm */
|
||||
wlog_throttled ( pulse_request_fail_log_counter[iface], 100,
|
||||
"%s hbs_pulse_request failed - rc:%d\n", get_iface_name_str(iface), rc);
|
||||
|
||||
if ( pulse_request_fail_log_counter[iface] == INTERFACE_ERRORS_FOR_REINIT )
|
||||
if ( pulse_request_fail_log_counter[iface] > INTERFACE_ERRORS_FOR_REINIT )
|
||||
{
|
||||
_setup_pulse_messaging ( (iface_enum)iface , daemon_get_rmem_max ()) ;
|
||||
rc = _setup_pulse_messaging ( (iface_enum)iface , daemon_get_rmem_max ()) ;
|
||||
if ( rc )
|
||||
continue ;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
|
||||
* Copyright (c) 2013-2020 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
@ -437,8 +437,11 @@ int daemon_configure ( void )
|
||||
{
|
||||
if (strcmp(hbs_config.clstr_iface, hbs_config.mgmnt_iface))
|
||||
{
|
||||
clstr_network_provisioned = true ;
|
||||
ilog ("Cluster-host Name : %s\n", hbs_config.clstr_iface );
|
||||
if (strcmp(hbs_config.clstr_iface, LOOPBACK_IF))
|
||||
{
|
||||
clstr_network_provisioned = true ;
|
||||
ilog ("Cluster-host Name : %s\n", hbs_config.clstr_iface );
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( clstr_network_provisioned == true )
|
||||
@ -476,44 +479,80 @@ int _setup_pulse_messaging ( iface_enum i, int rmem )
|
||||
/* client sockets are not modified */
|
||||
UNUSED(rmem);
|
||||
|
||||
/* Load up the interface name */
|
||||
if ( i == MGMNT_IFACE )
|
||||
{
|
||||
iface = hbs_config.mgmnt_iface ;
|
||||
}
|
||||
else if (( i == CLSTR_IFACE ) && ( hbs_config.clstr_iface != NULL ))
|
||||
{
|
||||
iface = hbs_config.clstr_iface ;
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("No Cluster-host Interface\n");
|
||||
return (RETRY);
|
||||
}
|
||||
|
||||
_close_pulse_rx_sock (i);
|
||||
_close_pulse_tx_sock (i);
|
||||
|
||||
/********************************************************************/
|
||||
/* Setup multicast Pulse Request Receive Socket */
|
||||
/********************************************************************/
|
||||
|
||||
hbs_sock.rx_sock[i] =
|
||||
new msgClassRx(hbs_config.multicast,hbs_sock.rx_port[i],IPPROTO_UDP,iface,true,true);
|
||||
if (hbs_sock.rx_sock[i]->return_status != PASS)
|
||||
/* Load up the interface name */
|
||||
if ( i == MGMNT_IFACE )
|
||||
{
|
||||
elog("Cannot create socket (%d) (%d:%m)\n", i, errno );
|
||||
_close_pulse_rx_sock (i);
|
||||
iface = hbs_config.mgmnt_iface ;
|
||||
if (strcmp(iface, LOOPBACK_IF))
|
||||
{
|
||||
hbs_sock.rx_sock[i] =
|
||||
new msgClassRx(hbs_config.multicast,hbs_sock.rx_port[i],IPPROTO_UDP,iface,true,true);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Default to unicast heartbeat on management 'lo' interface
|
||||
hbs_sock.rx_sock[i] =
|
||||
new msgClassRx(my_address.data(),hbs_sock.rx_port[i],IPPROTO_UDP,iface,false, false);
|
||||
}
|
||||
|
||||
}
|
||||
else if (( i == CLSTR_IFACE ) &&
|
||||
( clstr_network_provisioned == true ) &&
|
||||
( hbs_config.clstr_iface != NULL ))
|
||||
{
|
||||
iface = hbs_config.clstr_iface ;
|
||||
hbs_sock.rx_sock[i] =
|
||||
new msgClassRx(hbs_config.multicast,hbs_sock.rx_port[i],IPPROTO_UDP,iface,true,true);
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog("Cluster host interface not used.");
|
||||
return (PASS);
|
||||
}
|
||||
|
||||
if ( hbs_sock.rx_sock[i] )
|
||||
{
|
||||
if (hbs_sock.rx_sock[i]->return_status != PASS)
|
||||
{
|
||||
elog("Failed to create %s pulse receiver socket (%d:%d:%m)\n",
|
||||
get_iface_name_str(i),
|
||||
hbs_sock.rx_sock[i]->return_status,
|
||||
errno );
|
||||
_close_pulse_rx_sock (i);
|
||||
return (FAIL_SOCKET_CREATE);
|
||||
}
|
||||
hbs_sock.rx_sock[i]->sock_ok(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
elog("Failed to create %s pulse receiver socket (%d:%m)\n",
|
||||
get_iface_name_str(i), errno );
|
||||
return (FAIL_SOCKET_CREATE);
|
||||
}
|
||||
hbs_sock.rx_sock[i]->sock_ok(true);
|
||||
|
||||
/********************************************************************/
|
||||
/* Setup unicast transmit (reply) socket */
|
||||
/********************************************************************/
|
||||
hbs_sock.tx_sock[i] =
|
||||
new msgClassTx(hbs_config.multicast,hbs_sock.tx_port[i],IPPROTO_UDP, iface);
|
||||
if ( hbs_sock.tx_sock[i] == NULL )
|
||||
{
|
||||
elog("Failed to create %s pulse reply socket (%d:%m)\n",
|
||||
get_iface_name_str(i), errno );
|
||||
return (FAIL_SOCKET_CREATE);
|
||||
}
|
||||
if (hbs_sock.tx_sock[i]->return_status != PASS)
|
||||
{
|
||||
elog("Cannot create unicast transmit socket (%d) (%d:%m)\n", i, errno );
|
||||
elog("Failed to create %s pulse reply socket (%d:%d:%m)\n",
|
||||
get_iface_name_str(i),
|
||||
hbs_sock.tx_sock[i]->return_status,
|
||||
errno );
|
||||
_close_pulse_tx_sock(i);
|
||||
return (FAIL_SOCKET_CREATE);
|
||||
}
|
||||
@ -1234,7 +1273,7 @@ int daemon_init ( string iface, string nodeType_str )
|
||||
}
|
||||
|
||||
/* Setup the heartbeat service messaging sockets */
|
||||
else if ( hbs_socket_init () != PASS )
|
||||
else if (( rc = hbs_socket_init ()) != PASS )
|
||||
{
|
||||
elog ("socket initialization failed (rc:%d)\n", rc );
|
||||
rc = FAIL_SOCKET_INIT;
|
||||
|
Loading…
Reference in New Issue
Block a user