Add 50 byte hostname support to maintenance

Hosts with hostnames longer than 31 characters do not
go online (locked-disabled-online) after installation.

This update enhances maintenance messaging to support
up to 50 byte/character hostnames.

System Install:
---------------
PASS: Verify system install
PASS: Verify AIO system install (regression)
PASS: Verify system install with long hostnames, deployment-config.yaml
PASS: Verify mtcAgent process startup/restart logs
PASS: Verify hbsAgent process startup/restart logs (active controller)
PASS: Verify hbsAgent process startup/restart logs (standby controller)
PASS: Verify hwmond process startup/restart logs
PASS: Verify guestAgent process startup/restart logs
PASS: Verify all common maintenance daemons startup/restart logs
PASS: Verify patch applies and removes cleanly

PASS: Verify long hostname Add ; inventory distribution
PASS: Verify short hostname Add ; inventory distribution

Long Hostname Handling:
-----------------------
PASS: Verify host name support for up to 50 and 51 byte hostnames

Heartbeat Monitoring:
---------------------
PASS: Verify cluster-host interface link down handling.
PASS: Verify graceful recovery from host reboot.
PASS: Verify pmond process failure and recovery cycle.

Maintenance Actions:
--------------------
PASS: Verify host install with 50 byte hostname
PASS: Verify host lock
PASS: Verify host unlock
PASS: Verify host reboot
PASS: Verify host reinstall
PASS: Verify host delete (no core dump / all daemon logs)
PASS: Verify host power-off
PASS: Verify host power-on
PASS: Verify BMC State Info
PASS: Verify lock and unlock storage node
PASS: Controller Swact over and Back
PASS: Verify thresholded heartbeat failure handling
PASS: Verify node locked flag file
PASS: Verify no core dumps during testiong

Hardware Monitor:
-----------------
PASS: Verify BMC Provisioning/Reprovisioning/Deprovisioning
PASS: Verify Inventory Add/Delete/Modify
PASS: Verify Sensor Model and Monitoring
PASS: Verify Sensor Model Relearn
PASS: Verify Alarming and Logs
PASS: Verify Sensor Action, Interval modification
PASS: Verify Critical Sensor Action handling (ignore, log, alarm, reset, power cycle)

Guest Agent:
------------
PASS: Verify inventory add and delete

Process Monitor:
----------------
PASS: Verify process monitor logs
PASS: Verify process monitor events into mtcAgent
PASS: Verify process monitor failure alarming and recovery clear.
PASS: Verify process monitor regression script (test-pmon.sh -c restart)
PASS: Verify process monitor regression script (test-pmon.sh -c kill)
PASS: Verify process monitor regression script (test-pmon-action.sh)
PASS: Verify critical process failure handling
PASS: Verify major process failure handling

Collectd Monitoring:
-----------------
PASS: Verify collectd monitoring for long hostname hosts

Regression:
-----------
PASS: Verify mtce daemon sigal handling (test-signals.sh)

Change-Id: If22ab081397ec1e8b24f20aad8c99f8079cb98a5
Closes-Bug: 1824429
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2019-06-18 08:33:28 -04:00
parent c3644a54e3
commit a4238c2a35
19 changed files with 915 additions and 734 deletions

View File

@ -309,17 +309,17 @@ int msgClassAddr::getAddressFromInterface(const char* interface, char* address,
return rc; return rc;
} }
} }
char hostname[MAX_HOST_NAME_SIZE+1] = {0}; char hostname[MAX_CHARS_HOSTNAME] = {0};
if (gethostname(hostname, if (gethostname(hostname, MAX_CHARS_HOSTNAME) < 0)
MAX_HOST_NAME_SIZE) < 0) { {
elog("Failed to get system host name (err: %d)", errno); elog("Failed to gethostname (%d:%m)", errno);
return rc; return rc;
} }
// if hostname is localhost then resolution will give us // if hostname is localhost then resolution will give us
// the interface loopback address. Detect this case and // the interface loopback address. Detect this case and
// return. // return.
if (!strncmp(hostname, "localhost", 9)) { if (!strcmp(hostname, LOCALHOST)) {
wlog ("Detected localhost as system hostname." wlog ("Detected localhost as system hostname."
" Cannot resolve IP address"); " Cannot resolve IP address");
return rc; return rc;
@ -328,19 +328,17 @@ int msgClassAddr::getAddressFromInterface(const char* interface, char* address,
// if it is cluster-host then we need to determine the interface // if it is cluster-host then we need to determine the interface
// host name. For management interface, the system hostname // host name. For management interface, the system hostname
// is the intf hostname // is the intf hostname
const char* cluster_host_suffix = "-cluster-host"; char iface_hostname[MAX_CHARS_HOSTNAME];
size_t cluster_host_suffix_len = sizeof(cluster_host_suffix);
char iface_hostname[MAX_HOST_NAME_SIZE+cluster_host_suffix_len];
memset(iface_hostname, 0, sizeof(iface_hostname)); memset(iface_hostname, 0, sizeof(iface_hostname));
snprintf(iface_hostname, sizeof(iface_hostname), snprintf(iface_hostname, sizeof(iface_hostname),
"%s%s", hostname, "%s%s", hostname,
(((interface_type == CLSTR_IFACE)) ? cluster_host_suffix : "")); (((interface_type == CLSTR_IFACE)) ? CLUSTER_HOST_SUFFIX : ""));
struct addrinfo *res = NULL; struct addrinfo *res = NULL;
int ret = getaddrinfo(iface_hostname, NULL, NULL, &res); int ret = getaddrinfo(iface_hostname, NULL, NULL, &res);
if(ret) if(ret)
{ {
elog("IP address resolution failed for %s (err: %s)", elog("%s ip address resolution failed (err: %s)",
iface_hostname, gai_strerror(ret)); iface_hostname, gai_strerror(ret));
return rc; return rc;
} }

View File

@ -165,8 +165,8 @@ int get_netlink_events ( int nl_socket , std::list<string> & links_gone_down,
void log_link_events ( int netlink_sock, void log_link_events ( int netlink_sock,
int ioctl_sock, int ioctl_sock,
const char * mgmnt_iface_ptr, const char * mgmnt_iface_ptr,
const char * clstr_iface_ptr, const char * clstr_iface_ptr,
bool & mgmnt_link_up_and_running, bool & mgmnt_link_up_and_running,
bool & clstr_link_up_and_running) bool & clstr_link_up_and_running)
@ -175,28 +175,28 @@ void log_link_events ( int netlink_sock,
std::list<string> links_gone_up ; std::list<string> links_gone_up ;
std::list<string>::iterator iter_curr_ptr ; std::list<string>::iterator iter_curr_ptr ;
dlog3 ("logging for interfaces %s and %s\n", mgmnt_iface_ptr, clstr_iface_ptr); dlog3 ("logging for interfaces %s and %s\n", mgmnt_iface_ptr, clstr_iface_ptr);
if ( get_netlink_events ( netlink_sock, links_gone_down, links_gone_up )) if ( get_netlink_events ( netlink_sock, links_gone_down, links_gone_up ))
{ {
bool running = false ; bool running = false ;
if ( !links_gone_down.empty() ) if ( !links_gone_down.empty() )
{ {
dlog3 ("%ld links have dropped\n", links_gone_down.size() ); dlog3 ("%ld links have dropped\n", links_gone_down.size() );
/* Look at the down list */ /* Look at the down list */
for ( iter_curr_ptr = links_gone_down.begin(); for ( iter_curr_ptr = links_gone_down.begin();
iter_curr_ptr != links_gone_down.end() ; iter_curr_ptr != links_gone_down.end() ;
iter_curr_ptr++ ) iter_curr_ptr++ )
{ {
dlog3 ( "downed link: %s (running:%d:%d)\n", dlog3 ( "downed link: %s (running:%d:%d)\n",
iter_curr_ptr->c_str(), iter_curr_ptr->c_str(),
mgmnt_link_up_and_running, mgmnt_link_up_and_running,
clstr_link_up_and_running ); clstr_link_up_and_running );
if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data())) if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data()))
{ {
if ( mgmnt_link_up_and_running == true ) if ( mgmnt_link_up_and_running == true )
{ {
mgmnt_link_up_and_running = false ; mgmnt_link_up_and_running = false ;
wlog ("Mgmnt link %s is down\n", mgmnt_iface_ptr ); wlog ("Mgmnt link %s is down\n", mgmnt_iface_ptr );
} }
} }
@ -208,14 +208,19 @@ void log_link_events ( int netlink_sock,
wlog ("Cluster-host link %s is down\n", clstr_iface_ptr ); wlog ("Cluster-host link %s is down\n", clstr_iface_ptr );
} }
} }
if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS ) if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS )
{ {
dlog ("%s is down (oper:%s)\n", iter_curr_ptr->c_str(), running ? "up" : "down" ); wlog ("%s is down (oper:%s) (%ld)\n",
iter_curr_ptr->c_str(),
running ? "up" : "down",
iter_curr_ptr->length() );
} }
else else
{ {
wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); wlog ("%s is down (driver query failed) (len:%ld)\n",
iter_curr_ptr->c_str(),
iter_curr_ptr->length() );
} }
} }
} }
@ -228,14 +233,14 @@ void log_link_events ( int netlink_sock,
iter_curr_ptr != links_gone_up.end() ; iter_curr_ptr != links_gone_up.end() ;
iter_curr_ptr++ ) iter_curr_ptr++ )
{ {
dlog3 ( "recovered link: %s (running:%d:%d)\n", dlog3 ( "recovered link: %s (running:%d:%d)\n",
iter_curr_ptr->c_str(), iter_curr_ptr->c_str(),
mgmnt_link_up_and_running, mgmnt_link_up_and_running,
clstr_link_up_and_running ); clstr_link_up_and_running );
if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data())) if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data()))
{ {
mgmnt_link_up_and_running = true ; mgmnt_link_up_and_running = true ;
wlog ("Mgmnt link %s is up\n", mgmnt_iface_ptr ); wlog ("Mgmnt link %s is up\n", mgmnt_iface_ptr );
} }
if ( !strcmp (clstr_iface_ptr, iter_curr_ptr->data())) if ( !strcmp (clstr_iface_ptr, iter_curr_ptr->data()))
@ -246,13 +251,16 @@ void log_link_events ( int netlink_sock,
if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS ) if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS )
{ {
dlog ("%s is up (oper:%s)\n", wlog ("%s is up (oper:%s) (len:%ld)\n",
iter_curr_ptr->c_str(), iter_curr_ptr->c_str(),
running ? "up" : "down" ); running ? "up" : "down",
iter_curr_ptr->length() );
} }
else else
{ {
wlog ("%s is up (driver query failed)\n", iter_curr_ptr->c_str() ); wlog ("%s is up (driver query failed) (len:%ld)\n",
iter_curr_ptr->c_str(),
iter_curr_ptr->length() );
} }
} }
} }
@ -291,9 +299,9 @@ int open_netlink_socket ( int groups )
addr.nl_pid = getpid (); addr.nl_pid = getpid ();
/* addr.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR; */ /* addr.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR; */
addr.nl_groups = groups ; /* allow the caller to specify the groups */ addr.nl_groups = groups ; /* allow the caller to specify the groups */
if (bind (nl_socket, (struct sockaddr *) &addr, sizeof (addr)) < 0) if (bind (nl_socket, (struct sockaddr *) &addr, sizeof (addr)) < 0)
{ {
elog ( "Failed to bind netlink socket (%d:%s)\n", errno, strerror(errno)); elog ( "Failed to bind netlink socket (%d:%s)\n", errno, strerror(errno));
close (nl_socket); close (nl_socket);
nl_socket = 0 ; nl_socket = 0 ;

View File

@ -194,7 +194,7 @@ const char * get_mtcNodeCommand_str ( int cmd )
case MTC_EVENT_HOST_STALLED: return("host stalled event"); case MTC_EVENT_HOST_STALLED: return("host stalled event");
/* pmon events */ /* pmon events */
case MTC_EVENT_PMON_CLEAR: return("pmon clear"); case MTC_EVENT_PMON_CLEAR: return("pmon degrade clear");
case MTC_EVENT_PMON_CRIT: return("pmon critical event"); case MTC_EVENT_PMON_CRIT: return("pmon critical event");
case MTC_EVENT_PMON_MAJOR: return("pmon major event"); case MTC_EVENT_PMON_MAJOR: return("pmon major event");
case MTC_EVENT_PMON_MINOR: return("pmon minor event"); case MTC_EVENT_PMON_MINOR: return("pmon minor event");

View File

@ -163,6 +163,9 @@ void daemon_exit ( void );
#define LOOPBACK_IPV6 "::1" #define LOOPBACK_IPV6 "::1"
#define LOCALHOST "localhost" #define LOCALHOST "localhost"
#define CLUSTER_HOST_SUFFIX ((const char*)("-cluster-host"))
#define NONE (const char *)"none" #define NONE (const char *)"none"
/** Largest heartbeat pulse (req/resp) message size */ /** Largest heartbeat pulse (req/resp) message size */
@ -198,8 +201,10 @@ void daemon_exit ( void );
#define MTC_JSON_INV_BMIP "bm_ip" #define MTC_JSON_INV_BMIP "bm_ip"
#define MTC_JSON_INV_BMTYPE "bm_type" #define MTC_JSON_INV_BMTYPE "bm_type"
#define MTC_JSON_INV_BMUN "bm_username" #define MTC_JSON_INV_BMUN "bm_username"
#define MTC_JSON_SERVICE "service"
#define MTC_JSON_SEVERITY "severity" #define MTC_JSON_SEVERITY "severity"
#define MTC_JSON_SENSOR "sensor"
#define MTC_JSON_PROCESS "process"
/* These Task strings should not be changed without /* These Task strings should not be changed without
* the corresponding change in Horizon. * the corresponding change in Horizon.
@ -336,9 +341,9 @@ void daemon_exit ( void );
/* root@controller-0:~# getconf HOST_NAME_MAX /* root@controller-0:~# getconf HOST_NAME_MAX
* 64 * 64
*/ */
#define MAX_CHARS_HOSTNAME (32) /**< The largest hostname length */ #define MAX_CHARS_HOSTNAME_32 (32)
// #define MAX_CHARS_HOSTNAME (64) /**< The largest hostname length */ #define MAX_CHARS_HOSTNAME (256) /**< the largest hostname length */
#define MAX_CHARS_FILENAME (256) /**< The largest hostname length */ #define MAX_CHARS_FILENAME (256) /**< the largest filename length */
#define MAX_CHARS_ON_LINE (256) /**> max number of chars on a single line */ #define MAX_CHARS_ON_LINE (256) /**> max number of chars on a single line */
#define MAX_CHARS_IN_INT (65) /**> max number of chars in an integer */ #define MAX_CHARS_IN_INT (65) /**> max number of chars in an integer */
@ -389,6 +394,26 @@ void daemon_exit ( void );
/* This label will resolve to an IP on the management network */ /* This label will resolve to an IP on the management network */
#define CONTROLLER_NFS ((const char *)"controller-nfs") #define CONTROLLER_NFS ((const char *)"controller-nfs")
/* Maintenance Daemon Services - actual names of the daemons */
/* ... controller only service / daemons */
#define MTC_SERVICE_MTCAGENT_NAME "mtcAgent"
#define MTC_SERVICE_HBSAGENT_NAME "hbsAgent"
#define MTC_SERVICE_HWMOND_NAME "hwmond"
#define MTC_SERVICE_GUESTAGENT_NAME "guestAgent"
/* ... all nodes services / daemons */
#define MTC_SERVICE_PMOND_NAME "pmond"
#define MTC_SERVICE_HBSCLIENT_NAME "hbsClient"
#define MTC_SERVICE_MTCCLIENT_NAME "mtcClient"
#define MTC_SERVICE_HOSTW_NAME "hostwd"
#define MTC_SERVICE_FSMON_NAME "fsmond"
#define MTC_SERVICE_LMON_NAME "lmond"
#define MTC_SERVICE_MTCLOG_NAME "mtclogd"
/* ... compute only services / daemons */
#define MTC_SERVICE_GUESTSERVER_NAME "guestServer"
#define CGTS_NODE_TYPES 4 #define CGTS_NODE_TYPES 4
#define CGTS_NODE_TYPE_SIZE 12 #define CGTS_NODE_TYPE_SIZE 12
#define CGTS_NODE_NULL (0x00) #define CGTS_NODE_NULL (0x00)
@ -499,6 +524,7 @@ const char * get_heartbeat_ready_header( void ) ;
#define MTC_CMD_REVISION (0) #define MTC_CMD_REVISION (0)
#define MTC_CMD_FEATURE_VER__MACADDR_IN_CMD (1) #define MTC_CMD_FEATURE_VER__MACADDR_IN_CMD (1)
#define MTC_CMD_FEATURE_VER__KEYVALUE_IN_BUF (2)
typedef struct typedef struct
{ {

View File

@ -110,6 +110,7 @@ void node_inv_init (node_inv_type & inv)
inv.uuid.clear(); inv.uuid.clear();
inv.name.clear(); inv.name.clear();
inv.ip.clear(); inv.ip.clear();
inv.clstr_ip.clear();
inv.mac.clear(); inv.mac.clear();
inv.admin.clear(); inv.admin.clear();
inv.oper.clear(); inv.oper.clear();
@ -750,7 +751,7 @@ int get_iface_address ( const char * iface_ptr, string & ip_addr , bool retry )
if ( rc == PASS ) if ( rc == PASS )
{ {
ip_addr = ip_cstr; ip_addr = ip_cstr;
ilog ("IP Address : %s\n", ip_addr.c_str() ); dlog ("IP Address : %s\n", ip_addr.c_str() );
} }
else else
{ {

View File

@ -281,6 +281,7 @@ nodeLinkClass::nodeLinkClass()
my_hostname.clear() ; my_hostname.clear() ;
my_local_ip.clear() ; my_local_ip.clear() ;
my_float_ip.clear() ; my_float_ip.clear() ;
my_clstr_ip.clear() ;
active_controller_hostname.clear() ; active_controller_hostname.clear() ;
inactive_controller_hostname.clear() ; inactive_controller_hostname.clear() ;
@ -2351,15 +2352,15 @@ int nodeLinkClass::mod_host ( node_inv_type & inv )
} }
if ( node_ptr->clstr_ip.compare ( inv.clstr_ip ) ) if ( node_ptr->clstr_ip.compare ( inv.clstr_ip ) )
{ {
if (( hostUtil_is_valid_ip_addr ( inv.clstr_ip )) || ( hostUtil_is_valid_ip_addr ( node_ptr->clstr_ip ))) if ( hostUtil_is_valid_ip_addr ( inv.clstr_ip ))
{ {
plog ("%s Modify 'clstr_ip' from %s -> %s\n", plog ("%s Modify 'clstr_ip' from %s -> %s\n",
node_ptr->hostname.c_str(), node_ptr->hostname.c_str(),
node_ptr->clstr_ip.c_str(), inv.clstr_ip.c_str() ); node_ptr->clstr_ip.c_str(), inv.clstr_ip.c_str() );
modify = true ; /* we have a delta */ modify = true ; /* we have a delta */
node_ptr->clstr_ip = inv.clstr_ip ;
} }
node_ptr->clstr_ip = inv.clstr_ip ;
} }
if ( (!inv.name.empty()) && (node_ptr->hostname.compare ( inv.name)) ) if ( (!inv.name.empty()) && (node_ptr->hostname.compare ( inv.name)) )
{ {
@ -2976,10 +2977,13 @@ int nodeLinkClass::add_heartbeat_host ( const node_inv_type & inv )
/* Handle the case where we are adding a node that is already */ /* Handle the case where we are adding a node that is already */
/* present if so just update the inventory data not the mtc state */ /* present if so just update the inventory data not the mtc state */
node_ptr = nodeLinkClass::getNode(inv.name); node_ptr = nodeLinkClass::getNode(inv.name);
if ( node_ptr ) if ( node_ptr )
{ {
dlog ("%s already provisioned\n", node_ptr->hostname.c_str()); dlog ("%s already provisioned\n", node_ptr->hostname.c_str());
rc = RETRY ; node_ptr->nodetype = inv.nodetype ;
node_ptr->ip = inv.ip ;
node_ptr->clstr_ip = inv.clstr_ip ;
rc = PASS ;
} }
/* Otherwise add it as a new node */ /* Otherwise add it as a new node */
else else
@ -2989,6 +2993,8 @@ int nodeLinkClass::add_heartbeat_host ( const node_inv_type & inv )
{ {
node_ptr->hostname = inv.name ; node_ptr->hostname = inv.name ;
node_ptr->nodetype = inv.nodetype ; node_ptr->nodetype = inv.nodetype ;
node_ptr->ip = inv.ip ;
node_ptr->clstr_ip = inv.clstr_ip ;
dlog ("%s added to linked list\n", inv.name.c_str()); dlog ("%s added to linked list\n", inv.name.c_str());
rc = PASS ; rc = PASS ;
} }
@ -3323,21 +3329,32 @@ int nodeLinkClass::set_clstr_hostaddr ( string & hostname, string & ip )
node_ptr = nodeLinkClass::getNode ( hostname ); node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL ) if ( node_ptr != NULL )
{ {
node_ptr->clstr_ip = ip ; if (( hostUtil_is_valid_ip_addr(ip)) && ( node_ptr->clstr_ip != ip ))
{
ilog ("%s cluster address provision change from %s to %s",
hostname.c_str(),
node_ptr->clstr_ip.empty() ? "none" : node_ptr->clstr_ip.c_str(),
ip.c_str());
node_ptr->clstr_ip = ip ;
send_hbs_command ( node_ptr->hostname, MTC_CMD_MOD_HOST );
}
rc = PASS ; rc = PASS ;
} }
return ( rc ); return ( rc );
} }
string nodeLinkClass::get_hostname ( string & hostaddr ) string nodeLinkClass::get_hostname ( string hostaddr )
{ {
if (( hostaddr == LOOPBACK_IPV6 ) || if (( hostaddr == LOOPBACK_IPV6 ) ||
( hostaddr == LOOPBACK_IP ) || ( hostaddr == LOOPBACK_IP ) ||
( hostaddr == LOCALHOST )) ( hostaddr == LOCALHOST ) ||
( hostaddr == my_local_ip ) ||
( hostaddr == my_float_ip ) ||
( hostaddr == my_clstr_ip ))
{ {
return(my_hostname); return(this->my_hostname);
} }
else else if ( this->hosts )
{ {
nodeLinkClass::node* node_ptr ; nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostaddr ); node_ptr = nodeLinkClass::getNode ( hostaddr );
@ -3345,8 +3362,8 @@ string nodeLinkClass::get_hostname ( string & hostaddr )
{ {
return ( node_ptr->hostname ); return ( node_ptr->hostname );
} }
return ( null_str );
} }
return ( null_str );
} }
string nodeLinkClass::get_hostname_from_bm_ip ( string bm_ip ) string nodeLinkClass::get_hostname_from_bm_ip ( string bm_ip )
@ -4763,7 +4780,9 @@ int nodeLinkClass::declare_service_ready ( string & hostname,
else if ( service == MTC_SERVICE_PMOND ) else if ( service == MTC_SERVICE_PMOND )
{ {
node_ptr->pmond_ready = true ; node_ptr->pmond_ready = true ;
plog ("%s got pmond ready event\n", hostname.c_str()); plog ("%s %s ready event\n",
hostname.c_str(),
MTC_SERVICE_PMOND_NAME);
/* A ready event means that pmond pocess has started. /* A ready event means that pmond pocess has started.
* Any previous history is gone. Cleanup mtce. * Any previous history is gone. Cleanup mtce.
@ -4775,7 +4794,9 @@ int nodeLinkClass::declare_service_ready ( string & hostname,
else if ( service == MTC_SERVICE_HWMOND ) else if ( service == MTC_SERVICE_HWMOND )
{ {
node_ptr->hwmond_ready = true ; node_ptr->hwmond_ready = true ;
plog ("%s got hwmond ready event\n", hostname.c_str()); plog ("%s %s ready event\n",
hostname.c_str(),
MTC_SERVICE_HWMOND_NAME);
if ( node_ptr->bm_provisioned == true ) if ( node_ptr->bm_provisioned == true )
{ {
send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST );
@ -4788,7 +4809,9 @@ int nodeLinkClass::declare_service_ready ( string & hostname,
if ( node_ptr->hbsClient_ready == false ) if ( node_ptr->hbsClient_ready == false )
{ {
node_ptr->hbsClient_ready = true ; node_ptr->hbsClient_ready = true ;
plog ("%s got hbsClient ready event\n", hostname.c_str()); plog ("%s %s ready event\n",
hostname.c_str(),
MTC_SERVICE_HBSCLIENT_NAME);
} }
return (PASS); return (PASS);
} }
@ -4901,7 +4924,7 @@ int nodeLinkClass::node_degrade_control ( string & hostname, int state, string s
slog ("%s service not specified", hostname.c_str()); slog ("%s service not specified", hostname.c_str());
return (FAIL_STRING_EMPTY); return (FAIL_STRING_EMPTY);
} }
else if ( !service.compare("hwmon") ) else if ( service == MTC_SERVICE_HWMOND_NAME )
{ {
service_flag = DEGRADE_MASK_HWMON ; service_flag = DEGRADE_MASK_HWMON ;
} }
@ -5048,6 +5071,9 @@ int nodeLinkClass::invoke_hwmon_action ( string & hostname, int action, string
mtcTimer_reset ( node_ptr->hwmon_reset.recovery_timer ); mtcTimer_reset ( node_ptr->hwmon_reset.recovery_timer );
mtcTimer_start ( node_ptr->hwmon_reset.recovery_timer, mtcTimer_handler, MTC_MINS_15 ); mtcTimer_start ( node_ptr->hwmon_reset.recovery_timer, mtcTimer_handler, MTC_MINS_15 );
wlog ("%s invoking 'reset' due to critical '%s' sensor assertion\n",
hostname.c_str(), sensor.c_str());
force_full_enable ( node_ptr ); force_full_enable ( node_ptr );
} }
else else
@ -7394,7 +7420,7 @@ bool nodeLinkClass::get_hwmond_monitor_state ( string & hostname )
{ {
bool state = false ; bool state = false ;
if ( hostname.length() ) if ( hostname.length() )
{ {
struct nodeLinkClass::node* node_ptr ; struct nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname ); node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL ) if ( node_ptr != NULL )
@ -7419,9 +7445,14 @@ bool nodeLinkClass::get_hbs_monitor_state ( string & hostname, int iface )
state = node_ptr->monitor[iface] ; state = node_ptr->monitor[iface] ;
if ( state == true ) if ( state == true )
{ {
wlog_throttled (node_ptr->no_rri_log_throttle, rri_max, /* fast lookup not supported for hostnames longer than 31
"%s Not Offering RRI (%d)\n", * chars so in those cases don't do Not Offering RRI log */
hostname.c_str(), this->hosts ); if ( hostname.length() < MAX_CHARS_HOSTNAME_32 )
{
wlog_throttled (node_ptr->no_rri_log_throttle, rri_max,
"%s Not Offering RRI (hosts:%d)\n",
hostname.c_str(), this->hosts );
}
} }
else else
{ {
@ -7450,7 +7481,7 @@ void nodeLinkClass::manage_pulse_flags ( string & hostname, unsigned int flags )
/* Manage the heartbeat pulse flags by pulse_ptr */ /* Manage the heartbeat pulse flags by pulse_ptr */
void nodeLinkClass::manage_pulse_flags ( struct nodeLinkClass::node * node_ptr, unsigned int flags ) void nodeLinkClass::manage_pulse_flags ( struct nodeLinkClass::node * node_ptr, unsigned int flags )
{ {
/* Do nothing with the flags for missing pulse /* Do nothing with the flags for missing pulse
* responses (identified with flags=NULL_PULSE_FLAGS) */ * responses (identified with flags=NULL_PULSE_FLAGS) */
if ( flags == NULL_PULSE_FLAGS ) if ( flags == NULL_PULSE_FLAGS )
{ {
@ -7473,9 +7504,9 @@ void nodeLinkClass::manage_pulse_flags ( struct nodeLinkClass::node * node_ptr,
/* TODO: Does this need to be debounced ??? */ /* TODO: Does this need to be debounced ??? */
node_ptr->monitor[CLSTR_IFACE] = true ; node_ptr->monitor[CLSTR_IFACE] = true ;
} }
/* A host indicates that its process monitor is running by setting the /* A host indicates that its process monitor is running by setting the
* PMOND_FLAG occasionally in its pulse response. * PMOND_FLAG occasionally in its pulse response.
* The following if/else if clauses manage raising an alarm and degrading * The following if/else if clauses manage raising an alarm and degrading
* a host has stopped sending the PMOND_FLAG. */ * a host has stopped sending the PMOND_FLAG. */
if ( flags & PMOND_FLAG ) if ( flags & PMOND_FLAG )
@ -7707,8 +7738,8 @@ struct nodeLinkClass::node* nodeLinkClass::getPulseNode ( string & hostname , if
/* Find the node in the list of nodes being heartbeated and splice it out */ /* Find the node in the list of nodes being heartbeated and splice it out */
int nodeLinkClass::remPulse_by_index ( string hostname, int index, iface_enum iface, bool clear_b2b_misses_count, unsigned int flags ) int nodeLinkClass::remPulse_by_index ( string hostname, int index, iface_enum iface, bool clear_b2b_misses_count, unsigned int flags )
{ {
int rc = FAIL ; int rc = RETRY ;
if (( index > 0 ) && ( !(index > hosts))) if (!(index > hosts))
{ {
if ( hbs_rra[index] != NULL ) if ( hbs_rra[index] != NULL )
{ {
@ -7727,14 +7758,20 @@ int nodeLinkClass::remPulse_by_index ( string hostname, int index, iface_enum if
} }
else else
{ {
wlog_throttled ( node_ptr->unexpected_pulse_log_throttle, 200, "%s is not being monitored\n", hostname.c_str()); wlog_throttled ( node_ptr->unexpected_pulse_log_throttle, 200,
"%s is not being monitored\n",
hostname.c_str());
rc = PASS; rc = PASS;
} }
} }
else else
{ {
rc = remPulse_by_name ( hostname, iface, clear_b2b_misses_count, flags ); rc = remPulse_by_name ( hostname, iface, clear_b2b_misses_count, flags );
wlog_throttled ( node_ptr->lookup_mismatch_log_throttle, 200, "%s rri lookup mismatch (%s:%d) ; %s\n", hostname.c_str(), node_ptr->hostname.c_str(), index, rc ? "" : "removed by hostname" ); wlog_throttled ( node_ptr->lookup_mismatch_log_throttle, 200,
"%s rri lookup mismatch (%s:%d) ; %s\n",
hostname.c_str(),
node_ptr->hostname.c_str(),
index, rc ? "" : "removed by hostname" );
return (rc); return (rc);
} }
} }
@ -7984,13 +8021,6 @@ int nodeLinkClass::remPulse ( struct node * node_ptr, iface_enum iface, bool cle
* By index does not require a lookup whereas hostname does */ * By index does not require a lookup whereas hostname does */
int nodeLinkClass::remove_pulse ( string & hostname, iface_enum iface, int index, unsigned int flags ) int nodeLinkClass::remove_pulse ( string & hostname, iface_enum iface, int index, unsigned int flags )
{ {
/* TODO: consider removing this check */
if ( hostname == "localhost" )
{
/* localhost is not a supported hostname and indicates
* an unconfigured host response ; return the ignore response */
return(ENXIO);
}
if ( index ) if ( index )
{ {
int rc = remPulse_by_index ( hostname, index , iface, true , flags ); int rc = remPulse_by_index ( hostname, index , iface, true , flags );
@ -8001,8 +8031,11 @@ int nodeLinkClass::remove_pulse ( string & hostname, iface_enum iface, int index
default: mlog ("%s RRI Miss (rri:%d) (rc:%d)\n", hostname.c_str(), index, rc ); default: mlog ("%s RRI Miss (rri:%d) (rc:%d)\n", hostname.c_str(), index, rc );
} }
} }
else /* fast lookup not supported for hostnames longer than 31
* chars so in those cases don't do Not Offering RRI log */
if ( hostname.length() < MAX_CHARS_HOSTNAME_32 )
{ {
get_hbs_monitor_state ( hostname, iface ) ;
} }
return ( remPulse_by_name ( hostname , iface, true, flags )); return ( remPulse_by_name ( hostname , iface, true, flags ));
} }

View File

@ -1331,6 +1331,7 @@ public:
string my_hostname ; /**< */ string my_hostname ; /**< */
string my_local_ip ; /**< Primary IP address */ string my_local_ip ; /**< Primary IP address */
string my_float_ip ; /**< Secondary (floating) IP address */ string my_float_ip ; /**< Secondary (floating) IP address */
string my_clstr_ip ; /**< Cluster network IP address */
/********* New Public Constructs for IPMI Comamnd Handling ***********/ /********* New Public Constructs for IPMI Comamnd Handling ***********/
@ -1371,7 +1372,7 @@ public:
int set_clstr_hostaddr ( string & hostname, string & ip ); int set_clstr_hostaddr ( string & hostname, string & ip );
/** get hostname for any hostname */ /** get hostname for any hostname */
string get_hostname ( string & hostaddr ); string get_hostname ( string hostaddr );
/******************************/ /******************************/
/* NODE TYPE Member Functions */ /* NODE TYPE Member Functions */

View File

@ -41,7 +41,7 @@ using namespace std;
#include "hbsBase.h" /* Heartbeat Base Header File */ #include "hbsBase.h" /* Heartbeat Base Header File */
#include "hbsAlarm.h" /* for ... hbsAlarm_clear_all */ #include "hbsAlarm.h" /* for ... hbsAlarm_clear_all */
#include "alarm.h" /* for ... alarm send message to mtcalarmd */ #include "alarm.h" /* for ... alarm send message to mtcalarmd */
#include "jsonUtil.h" /* for ... jsonUtil_get_key_val */ #include "jsonUtil.h" /* for ... jsonUtil_get_key_val */
/************************************************************** /**************************************************************
* Implementation Structure * Implementation Structure
@ -414,39 +414,6 @@ int daemon_configure ( void )
ilog("Mgmnt Port : %d (rx)", hbs_config.hbs_agent_mgmnt_port ); ilog("Mgmnt Port : %d (rx)", hbs_config.hbs_agent_mgmnt_port );
ilog("Mgmnt Port : %d (tx)\n", hbs_config.hbs_client_mgmnt_port ); ilog("Mgmnt Port : %d (tx)\n", hbs_config.hbs_client_mgmnt_port );
/* Fetch the cluster-host interface name.
* calls daemon_get_iface_master inside so the
* aggrigated name is returned if it exists */
get_clstr_iface (&hbs_config.clstr_iface );
if ( strlen(hbs_config.clstr_iface) )
{
if (!strcmp(hbs_config.clstr_iface, hbs_config.mgmnt_iface))
{
hbsInv.clstr_network_provisioned = false ;
}
else
{
hbsInv.clstr_network_provisioned = true ;
ilog ("Clstr Name : %s", hbs_config.clstr_iface );
ilog ("Clstr Port : %d (rx)", hbs_config.hbs_agent_clstr_port );
ilog ("Clstr Port : %d (tx)", hbs_config.hbs_client_clstr_port );
}
}
ilog("Command Port: %d (rx)\n", hbs_config.mtc_to_hbs_cmd_port );
ilog("Event Port : %d (tx)\n", hbs_config.hbs_to_mtc_event_port );
ilog("Alarm Port : %d (tx)\n", hbs_config.mtcalarm_req_port );
hbsInv.hbs_state_change = true ;
/* pull in the degrade only config option */
hbsInv.clstr_degrade_only = hbs_config.clstr_degrade_only ;
if ( hbsInv.hbs_degrade_threshold >= hbsInv.hbs_failure_threshold )
{
wlog ("Degrade threshold should be smaller than Failure threshold\n");
wlog ("Heartbeat 'degrade' state disabled ; see %s\n", MTCE_CONF_FILE);
}
for ( ;; ) for ( ;; )
{ {
get_ip_addresses ( hbsInv.my_hostname, hbsInv.my_local_ip , hbsInv.my_float_ip ); get_ip_addresses ( hbsInv.my_hostname, hbsInv.my_local_ip , hbsInv.my_float_ip );
@ -464,6 +431,39 @@ int daemon_configure ( void )
break ; break ;
} }
} }
/* Fetch the cluster-host interface name.
* calls daemon_get_iface_master inside so the
* aggrigated name is returned if it exists */
get_clstr_iface (&hbs_config.clstr_iface );
if ( strlen(hbs_config.clstr_iface) )
{
int rc = get_iface_address ( hbs_config.clstr_iface,
hbsInv.my_clstr_ip, false );
if ( rc )
{
elog ("get Clstr IP address failed '%s' (%d:%d:%m)\n",
hbs_config.clstr_iface, rc, errno );
}
else
{
ilog ("Clstr Addr : %s\n", hbsInv.my_clstr_ip.c_str());
}
if (!strcmp(hbs_config.clstr_iface, hbs_config.mgmnt_iface))
{
hbsInv.clstr_network_provisioned = false ;
}
else
{
hbsInv.clstr_network_provisioned = true ;
ilog ("Clstr Name : %s", hbs_config.clstr_iface );
ilog ("Clstr Port : %d (rx)", hbs_config.hbs_agent_clstr_port );
ilog ("Clstr Port : %d (tx)", hbs_config.hbs_client_clstr_port );
}
}
ilog("Command Port: %d (rx)\n", hbs_config.mtc_to_hbs_cmd_port );
ilog("Event Port : %d (tx)\n", hbs_config.hbs_to_mtc_event_port );
ilog("Alarm Port : %d (tx)\n", hbs_config.mtcalarm_req_port );
/* Set Controller Activity State */ /* Set Controller Activity State */
hbs_config.active = daemon_get_run_option ("active") ; hbs_config.active = daemon_get_run_option ("active") ;
@ -476,6 +476,19 @@ int daemon_configure ( void )
else else
hbsInv.set_activity_state ( false ); hbsInv.set_activity_state ( false );
/* Start assuming a change */
hbsInv.hbs_state_change = true ;
/* pull in the degrade only config option */
hbsInv.clstr_degrade_only = hbs_config.clstr_degrade_only ;
if ( hbsInv.hbs_degrade_threshold >= hbsInv.hbs_failure_threshold )
{
wlog ("Degrade threshold should be smaller than Failure threshold\n");
wlog ("Heartbeat 'degrade' state disabled ; see %s\n", MTCE_CONF_FILE);
}
return (PASS); return (PASS);
} }
@ -888,9 +901,6 @@ int hbs_pulse_request ( iface_enum iface,
int bytes = 0 ; int bytes = 0 ;
if ( hbs_sock.tx_sock[iface] ) if ( hbs_sock.tx_sock[iface] )
{ {
// int unused_networks = 0 ;
memset ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE], 0, MAX_CHARS_HOSTNAME );
/* Add message version - 0 -> 1 with the acction of cluster information */ /* Add message version - 0 -> 1 with the acction of cluster information */
hbs_sock.tx_mesg[iface].v = HBS_MESSAGE_VERSION ; hbs_sock.tx_mesg[iface].v = HBS_MESSAGE_VERSION ;
@ -903,14 +913,21 @@ int hbs_pulse_request ( iface_enum iface,
/* Add this controller's lookup_clue /* Add this controller's lookup_clue
* ... aka RRI (Resource Reference Index) */ * ... aka RRI (Resource Reference Index) */
/* Fast lookup clue supported for hostnames less than 32 bytes */
memset ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE], 0, MAX_CHARS_HOSTNAME_32 );
if (( lookup_clue ) && if (( lookup_clue ) &&
( hostname_clue.length() <= MAX_CHARS_HOSTNAME )) ( hostname_clue.length() < MAX_CHARS_HOSTNAME_32 ))
{ {
hbs_sock.tx_mesg[iface].c = lookup_clue ; hbs_sock.tx_mesg[iface].c = lookup_clue ;
memcpy ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE], memcpy ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE],
hostname_clue.data(), hostname_clue.data(),
hostname_clue.length()); hostname_clue.length());
} }
else
{
hbs_sock.tx_mesg[iface].c = 0;
}
/* Append the cluster info to the pulse request */ /* Append the cluster info to the pulse request */
hbs_cluster_append(hbs_sock.tx_mesg[iface]) ; hbs_cluster_append(hbs_sock.tx_mesg[iface]) ;
@ -973,19 +990,6 @@ hbs_pulse_request_out:
return (PASS); return (PASS);
} }
string get_hostname_from_pulse ( char * msg_ptr )
{
char temp [MAX_HOST_NAME_SIZE];
string hostname ;
char * str_ptr = strstr ( msg_ptr, ":" );
memset ( temp, 0 , MAX_HOST_NAME_SIZE );
sscanf ( ++str_ptr, "%31s", &temp[0] );
hostname = temp ;
return (hostname);
}
int _pulse_receive ( iface_enum iface , unsigned int seq_num ) int _pulse_receive ( iface_enum iface , unsigned int seq_num )
{ {
int bytes = 0 ; int bytes = 0 ;
@ -1022,9 +1026,9 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
// (hbs_sock.rx_mesg[iface].f & CTRLX_MASK ) >> CTRLX_BIT); // (hbs_sock.rx_mesg[iface].f & CTRLX_MASK ) >> CTRLX_BIT);
continue ; continue ;
} }
mlog ("%s Pulse Rsp: (%d) %s:%d: s:%d f:%x [%-27s] RRI:%d\n", mlog ("%s Pulse Rsp: (%d) from:%s:%d: s:%d flags:%x [%-27s] RRI:%d\n",
get_iface_name_str(iface), bytes, get_iface_name_str(iface), bytes,
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), hbs_sock.rx_sock[iface]->get_src_str(),
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
hbs_sock.rx_mesg[iface].s, hbs_sock.rx_mesg[iface].s,
hbs_sock.rx_mesg[iface].f, hbs_sock.rx_mesg[iface].f,
@ -1035,7 +1039,7 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
if ( strstr ( hbs_sock.rx_mesg[iface].m, rsp_msg_header) ) if ( strstr ( hbs_sock.rx_mesg[iface].m, rsp_msg_header) )
{ {
int rc = RETRY ; int rc = RETRY ;
string hostname = get_hostname_from_pulse (&hbs_sock.rx_mesg[iface].m[0]); string hostname = hbsInv.get_hostname (hbs_sock.rx_sock[iface]->get_src_str());
#ifdef WANT_FIT_TESTING #ifdef WANT_FIT_TESTING
if ( hbs_config.testmode == 1 ) if ( hbs_config.testmode == 1 )
@ -1169,11 +1173,6 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
int send_event ( string & hostname, unsigned int event_cmd, iface_enum iface ) int send_event ( string & hostname, unsigned int event_cmd, iface_enum iface )
{ {
int bytes ;
int bytes_to_send ;
int rc = PASS ;
int retries = 0 ;
if ((hbs_sock.hbs_event_tx_sock == NULL ) || if ((hbs_sock.hbs_event_tx_sock == NULL ) ||
(hbs_sock.hbs_event_tx_sock->sock_ok() == false )) (hbs_sock.hbs_event_tx_sock->sock_ok() == false ))
{ {
@ -1192,23 +1191,10 @@ int send_event ( string & hostname, unsigned int event_cmd, iface_enum iface )
hbs_cluster_log ( hbsInv.my_hostname, "event", true ); hbs_cluster_log ( hbsInv.my_hostname, "event", true );
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_loss_header()); snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_loss_header());
} }
else if ( event_cmd == MTC_EVENT_LOOPBACK ) else if (( event_cmd == MTC_EVENT_HEARTBEAT_MINOR_SET ) ||
{ ( event_cmd == MTC_EVENT_HEARTBEAT_MINOR_CLR ) ||
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header()); ( event_cmd == MTC_EVENT_HEARTBEAT_DEGRADE_SET ) ||
} ( event_cmd == MTC_EVENT_HEARTBEAT_DEGRADE_CLR ))
else if ( event_cmd == MTC_EVENT_HEARTBEAT_MINOR_SET )
{
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header());
}
else if ( event_cmd == MTC_EVENT_HEARTBEAT_MINOR_CLR )
{
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header());
}
else if ( event_cmd == MTC_EVENT_HEARTBEAT_DEGRADE_SET )
{
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header());
}
else if ( event_cmd == MTC_EVENT_HEARTBEAT_DEGRADE_CLR )
{ {
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header()); snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header());
} }
@ -1228,38 +1214,34 @@ int send_event ( string & hostname, unsigned int event_cmd, iface_enum iface )
return ( FAIL_BAD_CASE ); return ( FAIL_BAD_CASE );
} }
/* Put the hostname in the buffer - as well */ snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME_32, "%s", hostname.data());
snprintf ( &event.buf[0] , MAX_CHARS_HOSTNAME, "%s", hostname.data());
/* TODO: obsolete this method in the future as it limits the host name lenth to 32 */
snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME, "%s", hostname.data());
event.cmd = event_cmd ; event.cmd = event_cmd ;
event.num = 1 ; event.num = 1 ;
event.parm[0] = iface ; event.parm[0] = iface ;
/* Support for 64 byte hostnames */
event.ver = MTC_CMD_FEATURE_VER__KEYVALUE_IN_BUF ;
/* Json string starts at the beginning of the buffer */
event.res = 0 ;
string buf_info = "{\"hostname\":\"" ;
buf_info.append(hostname);
buf_info.append("\",\"service\":\"");
buf_info.append(MTC_SERVICE_HBSAGENT_NAME);
buf_info.append("\"}");
/* copy the string into the buffer and add one to the length to
* accomodate for the null terminator snprintf automatically adds */
snprintf ( &event.buf[event.res], buf_info.length()+1, "%s", buf_info.data());
print_mtc_message ( hostname, MTC_CMD_TX, event, get_iface_name_str(iface) , false ); print_mtc_message ( hostname, MTC_CMD_TX, event, get_iface_name_str(iface) , false );
if ( hbs_sock.hbs_event_tx_sock->write((char*)&event, sizeof(mtc_message_type)) <= 0 )
/* remove the buffer as it is not needed for this message */
bytes_to_send = ((sizeof(mtc_message_type))-(BUF_SIZE-hostname.length())) ;
do
{ {
bytes = hbs_sock.hbs_event_tx_sock->write((char*)&event,bytes_to_send); elog ("%s failed to send event to maintenance (%d:%m)", hostname.c_str(), errno );
if ( bytes <= 0 ) return ( FAIL_TO_TRANSMIT ) ;
{ }
rc = FAIL_TO_TRANSMIT ; return PASS ;
if ( retries++ > 3 )
{
elog ("Cannot communicate with maintenance\n");
return (RETRY);
}
}
else
rc = PASS ;
} while ( bytes <= 0 ) ;
return rc ;
} }
/* The main heartbeat service loop */ /* The main heartbeat service loop */
@ -1830,13 +1812,47 @@ void daemon_service_run ( void )
bytes = hbs_sock.mtc_to_hbs_sock->read((char*)&msg,sizeof(mtc_message_type)); bytes = hbs_sock.mtc_to_hbs_sock->read((char*)&msg,sizeof(mtc_message_type));
if ( bytes > 0 ) if ( bytes > 0 )
{ {
mlog ("Received Maintenance Command (%i)\n", bytes );
mlog ("%s - cmd:0x%x\n", &msg.hdr[0], msg.cmd );
if ( !strncmp ( get_hbs_cmd_req_header(), &msg.hdr[0], MSG_HEADER_SIZE )) if ( !strncmp ( get_hbs_cmd_req_header(), &msg.hdr[0], MSG_HEADER_SIZE ))
{ {
string hostname = &msg.hdr[MSG_HEADER_SIZE] ; string hostname ;
if ( msg.cmd == MTC_CMD_ACTIVE_CTRL ) node_inv_type inv ;
node_inv_init(inv);
/* 64 byte hostname support adds a json string to
* the message buffer containing the hostname as a
* key/value pair. */
if (( msg.ver >= MTC_CMD_FEATURE_VER__KEYVALUE_IN_BUF ) &&
( msg.buf[msg.res] == '{' ))
{
if ( jsonUtil_get_key_val(&msg.buf[msg.res],
MTC_JSON_INV_NAME, hostname) == PASS )
{
inv.name = hostname ;
if (( msg.cmd == MTC_CMD_ADD_HOST ) ||
( msg.cmd == MTC_CMD_MOD_HOST ))
{
jsonUtil_get_key_val(&msg.buf[msg.res], MTC_JSON_INV_HOSTIP, inv.ip);
if ( hbsInv.clstr_network_provisioned == true )
{
jsonUtil_get_key_val(&msg.buf[msg.res], MTC_JSON_INV_CLSTRIP, inv.clstr_ip);
}
}
}
}
else if ( msg.hdr[MSG_HEADER_SIZE] != '\0' )
{
/* get hostname by legacy method,
* ... from the header */
hostname = &msg.hdr[MSG_HEADER_SIZE] ;
}
if ( hostname.empty() )
{
/* no hostname ; no action to take */
wlog ("unable to get hostname from %s command",
get_mtcNodeCommand_str(msg.cmd));
}
else if ( msg.cmd == MTC_CMD_ACTIVE_CTRL )
{ {
bool logit = false ; bool logit = false ;
if ( hostname == hbsInv.my_hostname ) if ( hostname == hbsInv.my_hostname )
@ -1876,13 +1892,12 @@ void daemon_service_run ( void )
} }
else if ( msg.cmd == MTC_CMD_ADD_HOST ) else if ( msg.cmd == MTC_CMD_ADD_HOST )
{ {
node_inv_type inv ;
node_inv_init(inv);
inv.name = hostname ;
inv.nodetype = msg.parm[0]; inv.nodetype = msg.parm[0];
hbsInv.add_heartbeat_host ( inv ) ; hbsInv.add_heartbeat_host ( inv ) ;
hostname_inventory.push_back ( hostname ); hostname_inventory.push_back ( inv.name );
ilog ("%s added to heartbeat service (%d)\n", hostname.c_str(), msg.parm[0] ); ilog ("%s added to heartbeat service (%d)\n",
inv.name.c_str(),
inv.nodetype);
/* clear any outstanding alarms on the ADD */ /* clear any outstanding alarms on the ADD */
if (( hbsInv.hbs_failure_action != HBS_FAILURE_ACTION__NONE ) && if (( hbsInv.hbs_failure_action != HBS_FAILURE_ACTION__NONE ) &&
@ -1892,6 +1907,20 @@ void daemon_service_run ( void )
hbsInv.clstr_network_provisioned ); hbsInv.clstr_network_provisioned );
} }
} }
else if ( msg.cmd == MTC_CMD_MOD_HOST )
{
inv.nodetype = msg.parm[0];
hbsInv.add_heartbeat_host ( inv ) ;
ilog ("%s modified heartbeat info [%d]\n",
inv.name.c_str(),
inv.nodetype );
/* clear any outstanding alarms on the ADD */
if ( hbsInv.hbs_failure_action != HBS_FAILURE_ACTION__NONE )
{
hbsAlarm_clear_all ( hostname, hbsInv.clstr_network_provisioned );
}
}
else if ( msg.cmd == MTC_CMD_DEL_HOST ) else if ( msg.cmd == MTC_CMD_DEL_HOST )
{ {
hbsInv.mon_host ( hostname, false, false ); hbsInv.mon_host ( hostname, false, false );

View File

@ -61,7 +61,7 @@ const char req_msg_header [HBS_HEADER_SIZE+1] = {"cgts pulse req:"};
/** Heartbeat pulse response message header content */ /** Heartbeat pulse response message header content */
const char rsp_msg_header [HBS_HEADER_SIZE+1] = {"cgts pulse rsp:"}; const char rsp_msg_header [HBS_HEADER_SIZE+1] = {"cgts pulse rsp:"};
#define HBS_MAX_MSG (HBS_HEADER_SIZE+MAX_CHARS_HOSTNAME) #define HBS_MAX_MSG (HBS_HEADER_SIZE+MAX_CHARS_HOSTNAME_32)
#define HBS_MESSAGE_VERSION (1) // 0 -> 1 with intro of cluster info #define HBS_MESSAGE_VERSION (1) // 0 -> 1 with intro of cluster info

View File

@ -875,6 +875,18 @@ int _service_pulse_request ( iface_enum iface , unsigned int flags )
} }
} }
/* Manage the Resource Reference Index (RRI) "lookup clue"
* Only supported for hostnames -lt 32 bytes */
if (( strnlen(&my_hostname[0], MAX_CHARS_HOSTNAME) < MAX_CHARS_HOSTNAME_32) &&
(!strncmp(&hbs_sock.rx_mesg[iface].m[HBS_HEADER_SIZE], &my_hostname[0], MAX_CHARS_HOSTNAME_32)))
{
if( rri[controller] != hbs_sock.rx_mesg[iface].c )
{
rri[controller] = hbs_sock.rx_mesg[iface].c ;
ilog ("Caching New RRI: %d (from controller-%d)\n", rri[controller], controller );
}
}
/* Log the received cluster info /* Log the received cluster info
* ... if the message version shows that it is supported */ * ... if the message version shows that it is supported */
if ( hbs_sock.rx_mesg[iface].v ) if ( hbs_sock.rx_mesg[iface].v )
@ -1070,7 +1082,6 @@ int hbs_send_event ( unsigned int event )
mtc_message_type msg ; mtc_message_type msg ;
int rc = FAIL_BAD_PARM ; int rc = FAIL_BAD_PARM ;
int bytes = 0 ;
memset (&msg, 0 , sizeof(mtc_message_type)); memset (&msg, 0 , sizeof(mtc_message_type));
@ -1097,22 +1108,31 @@ int hbs_send_event ( unsigned int event )
/* build the message */ /* build the message */
snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header()); snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header());
snprintf ( &msg.hdr[MSG_HEADER_SIZE], MAX_CHARS_HOSTNAME, "%s", &my_hostname[0]);
#define MAX_PROC_NAME_SIZE (64)
snprintf ( &msg.buf[0], MAX_PROC_NAME_SIZE, "%s", program_invocation_short_name);
size_t len = strlen(program_invocation_short_name);
bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len));
msg.cmd = event ; msg.cmd = event ;
msg.ver = MTC_CMD_FEATURE_VER__KEYVALUE_IN_BUF ;
string event_info = "{\"" ;
event_info.append(MTC_JSON_INV_NAME);
event_info.append("\":\"");
event_info.append(my_hostname);
event_info.append("\",\"");
event_info.append(MTC_JSON_SERVICE);
event_info.append("\":\"");
event_info.append(MTC_SERVICE_HBSCLIENT_NAME );
event_info.append( "\"}");
size_t len = event_info.length()+1 ;
snprintf ( &msg.buf[0], len, "%s", event_info.data());
int bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len));
if (( hbs_sock.hbs_ready_tx_sock ) && if (( hbs_sock.hbs_ready_tx_sock ) &&
( hbs_sock.hbs_ready_tx_sock->sock_ok() == true )) ( hbs_sock.hbs_ready_tx_sock->sock_ok() == true ))
{ {
mlog ("Ready message\n"); mlog ("%s sending ready event\n", my_hostname );
if ((rc = hbs_sock.hbs_ready_tx_sock->write((char*)&msg.hdr[0], bytes))!= bytes ) if ((rc = hbs_sock.hbs_ready_tx_sock->write((char*)&msg.hdr[0], bytes))!= bytes )
{ {
elog ("Ready message send failed (%d) (%d:%s)\n", rc, errno, strerror(errno) ); elog ("... ready event send failed (%d) (%d:%s)\n", rc, errno, strerror(errno) );
rc = FAIL_SOCKET_SENDTO ; rc = FAIL_SOCKET_SENDTO ;
} }
else else

View File

@ -357,7 +357,7 @@ void daemon_service_run ( void )
ilog ("Transmitting: Monitor READY Event\n" ); ilog ("Transmitting: Monitor READY Event\n" );
do do
{ {
rc = hwmon_send_event ( hwmon_ctrl.my_hostname, MTC_EVENT_MONITOR_READY, "hwmond" ); rc = hwmon_send_event ( hwmon_ctrl.my_hostname, MTC_EVENT_MONITOR_READY, "" );
if ( rc == RETRY ) if ( rc == RETRY )
{ {
mtcWait_secs ( 2 ); mtcWait_secs ( 2 );

View File

@ -131,9 +131,7 @@ int mtclogd_tx_port_init ( void )
int hwmon_send_event ( string hostname, unsigned int event_code , const char * sensor_ptr ) int hwmon_send_event ( string hostname, unsigned int event_code , const char * sensor_ptr )
{ {
mtc_message_type event ; mtc_message_type event ;
int rc = PASS ;
int rc = FAIL ;
int bytes = 0 ;
memset (&event, 0 , sizeof(mtc_message_type)); memset (&event, 0 , sizeof(mtc_message_type));
@ -149,59 +147,55 @@ int hwmon_send_event ( string hostname, unsigned int event_code , const char * s
( event_code == MTC_DEGRADE_RAISE ) || ( event_code == MTC_DEGRADE_RAISE ) ||
( event_code == MTC_DEGRADE_CLEAR )) ( event_code == MTC_DEGRADE_CLEAR ))
{ {
mlog ("%s sending '%s' event to mtcAgent for '%s'\n", string event_info = "" ;
hostname.c_str(),
mlog ("%s sending '%s' event to mtcAgent for '%s'\n",
hostname.c_str(),
get_event_str(event_code).c_str(), get_event_str(event_code).c_str(),
sensor_ptr ); sensor_ptr );
snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header()); snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header());
snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME , "%s", hostname.data()); /* Limit the size of the hostname in the header to 32 bytes
* - legacy support */
snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME_32, "%s", hostname.data());
/* Add support for up to 64 byte hostnames as a
* json string in the buffer. */
event.ver = MTC_CMD_FEATURE_VER__KEYVALUE_IN_BUF ;
event_info.append( "{\"service\":\"hwmond\",\"hostname\":\"" ) ;
event_info.append( hostname );
if ( sensor_ptr ) if ( sensor_ptr )
{ {
size_t len = strnlen ( sensor_ptr, MAX_SENSOR_NAME_LEN ); event_info.append( "\",\"sensor\":\"" ) ;
event_info.append( sensor_ptr );
/* We don't use the buffer for hwmon events to remove it from the size */ }
bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); event_info.append( "\"}");
snprintf ( &event.buf[event.res] , event_info.length()+1, "%s", event_info.data());
snprintf ( &event.buf[0], MAX_SENSOR_NAME_LEN, "%s", sensor_ptr ); /* Update the event code */
event.cmd = event_code ;
/* Send the event */
rc = hwmon_sock.event_sock->write((char*)&event.hdr[0],sizeof(mtc_message_type));
if ( rc )
{
rc = PASS ;
}
else
{
elog ("event send to %s:%d failed (%d:%d:%m)",
hwmon_sock.event_sock->get_dst_str(),
hwmon_sock.event_sock->get_dst_addr()->getPort(), rc, errno);
rc = FAIL_SOCKET_SENDTO ;
} }
} }
else if ( event_code == MTC_EVENT_LOOPBACK )
{
snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME , "%s", hostname.data());
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_loopback_header());
/* We don't use the buffer for hwmon events to remove it from the size */
bytes = ((sizeof(mtc_message_type))-(BUF_SIZE));
}
else else
{ {
elog ("Unsupported process monitor event (%d)\n", event_code ); elog ("Unsupported hardware monitor event (%d)\n", event_code );
return ( FAIL_BAD_PARM ); rc = FAIL_BAD_PARM ;
}
/* Update the event code */
event.cmd = event_code ;
/* Send the event */
if ((rc = hwmon_sock.event_sock->write((char*)&event.hdr[0],bytes)) != bytes )
{
elog ("Message send failed. (%d)\n", rc);
elog ("Message: %d bytes to <%s:%d>\n", bytes,
hwmon_sock.event_sock->get_dst_str(),
hwmon_sock.event_sock->get_dst_addr()->getPort());
rc = FAIL_SOCKET_SENDTO ;
}
else
{
mlog ("Sending '%s' Event with %d bytes to %s:%d\n",
get_event_str (event.cmd).c_str(), bytes,
hwmon_sock.event_sock->get_dst_str(),
hwmon_sock.event_sock->get_dst_addr()->getPort());
print_mtc_message (&event);
rc = PASS ;
} }
print_mtc_message ( hostname, MTC_CMD_TX, event, get_iface_name_str(MGMNT_INTERFACE), rc );
return rc ; return rc ;
} }
@ -213,7 +207,7 @@ int hwmon_service_inbox ( void )
int rc = PASS ; int rc = PASS ;
/* clean the rx/tx buffer */ /* clean the rx/tx buffer */
memset ((void*)&msg,0,sizeof(mtc_message_type)); memset ((void*)&msg,0,sizeof(mtc_message_type));
bytes = hwmon_sock.cmd_sock->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); bytes = hwmon_sock.cmd_sock->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
if( bytes <= 0 ) if( bytes <= 0 )
@ -238,8 +232,6 @@ int hwmon_service_inbox ( void )
hwmon_sock.cmd_sock->get_src_str(), hwmon_sock.cmd_sock->get_src_str(),
hwmon_sock.cmd_sock->get_dst_addr()->getPort()); hwmon_sock.cmd_sock->get_dst_addr()->getPort());
print_mtc_message ( &msg );
if ( !strnlen ( &msg.hdr[MSG_HEADER_SIZE], MAX_CHARS_HOSTNAME )) if ( !strnlen ( &msg.hdr[MSG_HEADER_SIZE], MAX_CHARS_HOSTNAME ))
{ {
wlog ("Mtce message (%x) did not specify target hostname\n", msg.cmd ); wlog ("Mtce message (%x) did not specify target hostname\n", msg.cmd );
@ -254,6 +246,7 @@ int hwmon_service_inbox ( void )
wlog ("%s failed to parse host info\n", inv.name.c_str()); wlog ("%s failed to parse host info\n", inv.name.c_str());
return (FAIL_KEY_VALUE_PARSE); return (FAIL_KEY_VALUE_PARSE);
} }
print_mtc_message ( inv.name, MTC_CMD_RX, msg, get_iface_name_str(MGMNT_IFACE) , false);
rc = PASS; rc = PASS;
if ( msg.cmd == MTC_CMD_ADD_HOST ) if ( msg.cmd == MTC_CMD_ADD_HOST )
@ -269,7 +262,6 @@ int hwmon_service_inbox ( void )
{ {
mlog ("%s add host message\n", inv.name.c_str()); mlog ("%s add host message\n", inv.name.c_str());
} }
} }
else if ( msg.cmd == MTC_CMD_DEL_HOST ) else if ( msg.cmd == MTC_CMD_DEL_HOST )
{ {
@ -306,17 +298,13 @@ int hwmon_service_inbox ( void )
{ {
mlog ("%s query host message - NOT IMPLEMENTED YET !!!\n", inv.name.c_str()); mlog ("%s query host message - NOT IMPLEMENTED YET !!!\n", inv.name.c_str());
} }
else if ( msg.cmd == MTC_CMD_LOOPBACK )
{
mlog ("Loopback command received\n");
}
else else
{ {
rc = FAIL_BAD_PARM ; rc = FAIL_BAD_PARM ;
elog ( "Unsupported maintenance command (%d)\n", msg.cmd ); elog ( "Unsupported maintenance command (%d)\n", msg.cmd );
} }
} }
else else
{ {
elog ("Unsupported Message\n"); elog ("Unsupported Message\n");
print_mtc_message ( &msg ) ; print_mtc_message ( &msg ) ;
@ -324,15 +312,14 @@ int hwmon_service_inbox ( void )
} }
#ifdef WANT_COMMAND_RESPONSE #ifdef WANT_COMMAND_RESPONSE
/* TODO: Test and enable reply message */ /* TODO: Test and enable reply message */
// snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header()); // snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header());
if ( rc == PASS ) if ( rc == PASS )
{ {
bytes = sizeof(mtc_message_type)-BUF_SIZE; bytes = sizeof(mtc_message_type)-BUF_SIZE;
rc = sendto( hwmon_sock.mtc_client_tx_sock, rc = sendto( hwmon_sock.mtc_client_tx_sock,
(char*)&msg.hdr[0], bytes , 0, (char*)&msg.hdr[0], bytes , 0,
(struct sockaddr *) &hwmon_sock.agent_addr, (struct sockaddr *) &hwmon_sock.agent_addr,
sizeof(hwmon_sock.agent_addr)); sizeof(hwmon_sock.agent_addr));
if (rc != bytes ) if (rc != bytes )
{ {
@ -345,9 +332,8 @@ int hwmon_service_inbox ( void )
{ {
mlog ("Response: <%s> to %s:%d\n", &msg.hdr[0], mlog ("Response: <%s> to %s:%d\n", &msg.hdr[0],
inet_ntoa(hwmon_sock.client_addr.sin_addr), inet_ntoa(hwmon_sock.client_addr.sin_addr),
ntohs(hwmon_sock.agent_addr.sin_port)); ntohs(hwmon_sock.agent_addr.sin_port));
} }
fflush(stdout);
} }
#endif #endif
return (rc); return (rc);

View File

@ -61,10 +61,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
int bytes = 0 ; int bytes = 0 ;
mtc_message_type msg ; mtc_message_type msg ;
int rc = FAIL ; int rc = FAIL ;
ctrl_type * ctrl_ptr = get_ctrl_ptr() ;
if ( interface == CLSTR_INTERFACE ) if ( interface == CLSTR_INTERFACE )
{ {
if ( ! get_ctrl_ptr()->clstr_iface_provisioned ) if ( ! ctrl_ptr->clstr_iface_provisioned )
{ {
wlog ("cannot receive from unprovisioned %s interface\n", wlog ("cannot receive from unprovisioned %s interface\n",
get_iface_name_str(interface) ); get_iface_name_str(interface) );
@ -74,13 +75,14 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
/* clean the rx/tx buffer */ /* clean the rx/tx buffer */
memset ((void*)&msg,0,sizeof(mtc_message_type)); memset ((void*)&msg,0,sizeof(mtc_message_type));
string hostaddr = "" ;
if ( interface == MGMNT_INTERFACE ) if ( interface == MGMNT_INTERFACE )
{ {
if (( sock_ptr->mtc_client_rx_socket ) && if (( sock_ptr->mtc_client_rx_socket ) &&
( sock_ptr->mtc_client_rx_socket->sock_ok() == true )) ( sock_ptr->mtc_client_rx_socket->sock_ok() == true ))
{ {
bytes = sock_ptr->mtc_client_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); rc = sock_ptr->mtc_client_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
hostaddr = sock_ptr->mtc_client_rx_socket->get_src_str();
} }
else else
{ {
@ -93,7 +95,8 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if (( sock_ptr->mtc_client_clstr_rx_socket ) && if (( sock_ptr->mtc_client_clstr_rx_socket ) &&
( sock_ptr->mtc_client_clstr_rx_socket->sock_ok() == true )) ( sock_ptr->mtc_client_clstr_rx_socket->sock_ok() == true ))
{ {
bytes = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); rc = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
hostaddr = sock_ptr->mtc_client_clstr_rx_socket->get_src_str();
} }
else else
{ {
@ -102,7 +105,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
} }
} }
if( bytes <= 0 ) if( rc <= 0 )
{ {
if ( ( errno == EINTR ) || ( errno == EAGAIN )) if ( ( errno == EINTR ) || ( errno == EAGAIN ))
{ {
@ -113,24 +116,34 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
return (FAIL_TO_RECEIVE); return (FAIL_TO_RECEIVE);
} }
} }
rc = PASS ;
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, get_iface_name_str(interface), false ); bool self = false ;
if (( hostaddr == ctrl_ptr->address ) ||
( hostaddr == ctrl_ptr->address_clstr ))
{
self = true ;
}
/* Message version greater than zero have the hosts management /* Message version greater than zero have the hosts management
* mac address appended to the header string */ * mac address appended to the header string */
if ( msg.ver >= MTC_CMD_FEATURE_VER__MACADDR_IN_CMD ) if (( !self ) && ( msg.ver >= MTC_CMD_FEATURE_VER__MACADDR_IN_CMD ))
{ {
/* the minus 1 is to back up from the null char that is accounted for in the hearder size */ /* the minus 1 is to back up from the null char that is accounted for in the hearder size */
if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], get_ctrl_ptr()->macaddr.data(), MSG_HEADER_SIZE )) if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], ctrl_ptr->macaddr.data(), MSG_HEADER_SIZE ))
{ {
wlog ("%s command not for this host (exp:%s det:%s) ; ignoring ...\n", wlog ("%s command not for this host (exp:%s det:%s) ; ignoring ...\n",
get_mtcNodeCommand_str(msg.cmd), get_mtcNodeCommand_str(msg.cmd),
get_ctrl_ptr()->macaddr.c_str(), ctrl_ptr->macaddr.c_str(),
&msg.hdr[MSG_HEADER_SIZE-1]); &msg.hdr[MSG_HEADER_SIZE-1]);
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, get_iface_name_str(interface), true ); rc = FAIL_INVALID_DATA ;
return (FAIL_INVALID_DATA);
} }
} }
print_mtc_message ( hostaddr, MTC_CMD_RX, msg, get_iface_name_str(interface), rc );
if ( rc )
return rc;
/* Check for response messages */ /* Check for response messages */
if ( strstr ( &msg.hdr[0], get_cmd_req_msg_header() ) ) if ( strstr ( &msg.hdr[0], get_cmd_req_msg_header() ) )
{ {

File diff suppressed because it is too large Load Diff

View File

@ -418,8 +418,7 @@ string mtcHttpSvr_inv_req ( char * request_ptr,
{ {
node_inv_type inv ; node_inv_type inv ;
node_inv_init (inv); node_inv_init (inv);
dlog ("%s %s : '%s'\n", obj_ptr->my_hostname.c_str(), key.c_str(), value.c_str()) ;
ilog ("%s %s : '%s'\n", obj_ptr->my_hostname.c_str(), key.c_str(), value.c_str()) ;
rc = jsonUtil_load_host ( request_ptr, inv ); rc = jsonUtil_load_host ( request_ptr, inv );
if ( rc == PASS ) if ( rc == PASS )
@ -451,6 +450,7 @@ string mtcHttpSvr_inv_req ( char * request_ptr,
*/ */
if ( rc == RETRY ) if ( rc == RETRY )
{ {
ilog ("%s Modify Operation\n", inv.name.c_str());
rc = obj_ptr->mod_host ( inv ); rc = obj_ptr->mod_host ( inv );
} }

View File

@ -289,7 +289,7 @@ void setup_mgmnt_rx_socket ( void )
ilog("Mgmnt iface : %s\n", ctrl.mgmnt_iface.c_str() ); ilog("Mgmnt iface : %s\n", ctrl.mgmnt_iface.c_str() );
get_iface_macaddr ( ctrl.mgmnt_iface.data(), ctrl.macaddr ); get_iface_macaddr ( ctrl.mgmnt_iface.data(), ctrl.macaddr );
get_iface_address ( ctrl.mgmnt_iface.data(), ctrl.address , true ); get_iface_address ( ctrl.mgmnt_iface.data(), ctrl.address , true );
get_hostname ( &ctrl.hostname[0], MAX_HOST_NAME_SIZE ); get_hostname ( &ctrl.hostname[0], MAX_HOST_NAME_SIZE );
_close_mgmnt_rx_socket (); _close_mgmnt_rx_socket ();
mtc_sock.mtc_client_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false ); mtc_sock.mtc_client_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false );

View File

@ -649,15 +649,13 @@ int daemon_configure ( void )
ilog("Inv Port : %d (tx)\n", mtc_config.sysinv_api_port ); ilog("Inv Port : %d (tx)\n", mtc_config.sysinv_api_port );
ilog("Inv Address : %s (tx)\n", mtc_config.sysinv_api_bind_ip ); ilog("Inv Address : %s (tx)\n", mtc_config.sysinv_api_bind_ip );
ilog("Inv Event : %d (rx)\n", mtc_config.inv_event_port ); ilog("Inv Event : %d (rx)\n", mtc_config.inv_event_port );
ilog("Keystone Port: %d (rx)\n", mtc_config.keystone_port ); ilog("Keystone Prt: %d (rx)\n", mtc_config.keystone_port );
ilog("Mtce Logger : %d (tx)\n", mtc_config.daemon_log_port ); ilog("Mtce Logger : %d (tx)\n", mtc_config.daemon_log_port );
ilog("nfv-vim-api : %d (port)\n", mtc_config.vim_cmd_port ); ilog("nfv-vim-api : %d (port)\n", mtc_config.vim_cmd_port );
ilog("hbsAgent : %d (port)\n", mtc_config.mtc_to_hbs_cmd_port ); ilog("hbsAgent : %d (port)\n", mtc_config.mtc_to_hbs_cmd_port );
ilog("guestAgent : %d (port)\n", mtc_config.mtc_to_guest_cmd_port ); ilog("guestAgent : %d (port)\n", mtc_config.mtc_to_guest_cmd_port );
ilog("hwmond : %d (port)\n", mtc_config.hwmon_cmd_port ); ilog("hwmond : %d (port)\n", mtc_config.hwmon_cmd_port );
ilog("auth_host : %s \n", mtc_config.keystone_auth_host ); ilog("auth_host : %s \n", mtc_config.keystone_auth_host );
ilog("Barbican Port: %d (rx)\n", mtc_config.barbican_api_port );
ilog("Barbican Address : %s (tx)\n", mtc_config.barbican_api_host );
/* log system wide service based auto recovery control values */ /* log system wide service based auto recovery control values */
ilog("AR Config : %d (threshold) %d sec (retry interval)", ilog("AR Config : %d (threshold) %d sec (retry interval)",

View File

@ -423,7 +423,7 @@ void load_processes ( void )
} }
} }
pmon_send_event ( MTC_EVENT_PMON_CLEAR, &process_config[0] ) ; pmon_send_event ( MTC_EVENT_PMON_CLEAR, NULL ) ;
ilog ("Registering Processes With Kernel\n"); ilog ("Registering Processes With Kernel\n");
ilog ("---------------------------------------------------------------\n"); ilog ("---------------------------------------------------------------\n");
@ -1945,7 +1945,7 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
if ( want_degrade_clear () == true ) if ( want_degrade_clear () == true )
{ {
dlog ("sending degrade clear\n"); dlog ("sending degrade clear\n");
pmon_send_event ( MTC_EVENT_PMON_CLEAR, &process_config[0] ) ; pmon_send_event ( MTC_EVENT_PMON_CLEAR, NULL ) ;
} }
else else
{ {

View File

@ -249,116 +249,87 @@ int pmon_send_pulse ( void )
int pmon_send_event ( unsigned int event_cmd , process_config_type * ptr ) int pmon_send_event ( unsigned int event_cmd , process_config_type * ptr )
{ {
mtc_message_type event ; mtc_message_type event ;
int rc = PASS ; int rc = PASS ;
int bytes = 0 ;
/* Don't report events while we are in reset mode */ /* Don't report events while we are in reset mode */
if ( daemon_is_file_present ( NODE_RESET_FILE ) ) if ( daemon_is_file_present ( NODE_RESET_FILE ) )
return ( PASS ); return ( PASS );
pmon_ctrl_type * ctrl_ptr = get_ctrl_ptr () ;
memset (&event, 0 , sizeof(mtc_message_type)); memset (&event, 0 , sizeof(mtc_message_type));
snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header());
if (( event_cmd == MTC_EVENT_MONITOR_READY) || event.cmd = event_cmd ;
( event_cmd == MTC_EVENT_PMON_LOG) || event.ver = MTC_CMD_FEATURE_VER__KEYVALUE_IN_BUF ;
( event_cmd == MTC_EVENT_PMON_MINOR) || event.num = 1 ;
( event_cmd == MTC_EVENT_PMON_MAJOR) || event.parm[0] = ctrl_ptr->nodetype ; /* default to node type */
( event_cmd == MTC_EVENT_PMON_CRIT ) ||
( event_cmd == MTC_EVENT_PMON_CLEAR )) string event_info = "{\"" ;
event_info.append(MTC_JSON_INV_NAME);
event_info.append("\":\"");
event_info.append(ctrl_ptr->my_hostname);
event_info.append("\",\"");
event_info.append(MTC_JSON_SERVICE);
event_info.append("\":\"");
event_info.append(MTC_SERVICE_PMOND_NAME );
if ( ( ptr != NULL ) && ( ptr->process ) )
{ {
pmon_ctrl_type * ctrl_ptr = get_ctrl_ptr () ; event_info.append("\",\"");
event_info.append(MTC_JSON_PROCESS);
event_info.append("\":\"");
event_info.append(ptr->process);
}
event_info.append( "\"}");
snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header()); size_t len = event_info.length()+1 ;
snprintf ( &event.buf[0], len, "%s", event_info.data());
int bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len));
/* Set the version/revision for PMON messages. */ /* override with subfunction case */
event.ver = MTC_MSG_VERSION_15_12_GA_PMON ; if (( ptr != NULL ) &&
event.rev = MTC_MSG_REVISION_15_12_GA_PMON ; ( ctrl_ptr->subfunction != 0 ) &&
( ctrl_ptr->subfunction != ctrl_ptr->function ))
if ( ptr->process ) {
if ( ptr->subfunction != NULL )
{ {
/* We don't use the buffer for pmon events to remove it from the size */ string temp = ptr->subfunction ;
bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-MAX_FILENAME_LEN)); event.parm[0]= get_host_function_mask (temp) ;
if ( ( event_cmd == MTC_EVENT_PMON_MINOR) ||
snprintf( &event.buf[0], MAX_PROCESS_NAME_LEN, "%s", ptr->process ); ( event_cmd == MTC_EVENT_PMON_MAJOR) ||
( event_cmd == MTC_EVENT_PMON_LOG) ||
/* Put the process function in parm zero of the event message */ ( event_cmd == MTC_EVENT_PMON_CRIT ) )
event.num = 1 ;
event.parm[0] = ctrl_ptr->nodetype ; /* default to node type */
if ( event_cmd == MTC_EVENT_PMON_CLEAR )
{ {
dlog ("pmond degrade clear\n" ); mlog ("%s process failed\n", ptr->process );
snprintf( &event.buf[0], MAX_PROCESS_NAME_LEN, "%s", "pmond" );
} }
else if (( event_cmd == MTC_EVENT_PMON_CRIT ) || else if (( event_cmd == MTC_EVENT_PMON_CLEAR ) && ( ptr->was_failed == true ))
( event_cmd == MTC_EVENT_PMON_MAJOR ))
{ {
wlog ("%s caused degrade assert\n", ptr->process ); ilog ("%s process recovered\n", ptr->process );
} ptr->was_failed = false ;
else if ( event_cmd == MTC_EVENT_PMON_MINOR )
{
slog ("degrade does not apply to minor\n" );
rc = FAIL_BAD_CASE ;
}
/* override with subfunction case */
if (( ctrl_ptr->subfunction != 0 ) &&
( ctrl_ptr->subfunction != ctrl_ptr->function ))
{
if ( ptr->subfunction != NULL )
{
string temp = ptr->subfunction ;
event.parm[0]= get_host_function_mask (temp) ;
if ( ( event_cmd == MTC_EVENT_PMON_MINOR) ||
( event_cmd == MTC_EVENT_PMON_MAJOR) ||
( event_cmd == MTC_EVENT_PMON_LOG) ||
( event_cmd == MTC_EVENT_PMON_CRIT ) )
{
mlog ("%s process failed\n", ptr->process );
}
else if (( event_cmd == MTC_EVENT_PMON_CLEAR ) && ( ptr->was_failed == true ))
{
ilog ("%s process recovered\n", ptr->process );
ptr->was_failed = false ;
}
}
} }
} }
} }
else if ( event_cmd == MTC_EVENT_LOOPBACK )
{
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_loopback_header());
/* We don't use the buffer for pmon events to remove it from the size */
bytes = ((sizeof(mtc_message_type))-(BUF_SIZE));
}
else
{
elog ("Unsupported process monitor event (%d)\n", event_cmd );
return ( FAIL_BAD_CASE );
}
event.cmd = event_cmd ;
print_mtc_message ( LOCALHOST, MTC_CMD_TX, event, get_iface_name_str(MGMNT_INTERFACE), false );
/* Send the event */ /* Send the event */
if ((rc = pmon_sock.event_sock->write((char*)&event.hdr[0], bytes)) != bytes ) if ((rc = pmon_sock.event_sock->write((char*)&event.hdr[0], bytes)) != bytes )
{ {
elog ("Message send failed. (%d)\n", rc); elog ("event message send failed (%d) (%d) (%d:%m) (%s:%d)\n",
elog ("Message: %d bytes to <%s:%d>\n", bytes, bytes, rc, errno,
pmon_sock.event_sock->get_dst_addr()->toString(), pmon_sock.event_sock->get_dst_addr()->toString(),
pmon_sock.event_sock->get_dst_addr()->getPort()); pmon_sock.event_sock->get_dst_addr()->getPort());
} }
else else
{ {
string severity = get_event_str ( event.cmd ); string severity = get_event_str ( event.cmd );
mlog ("Sending '%s' event for process '%s' to %s:%d (bytes:%d)\n", if ( ptr )
severity.c_str(), event.buf, {
pmon_sock.event_sock->get_dst_addr()->toString(), /* Only log the clear event for a specified process.
pmon_sock.event_sock->get_dst_addr()->getPort(), bytes); * Avoid logging the periodic degrade clear event. */
ilog ("%s %s sent", ctrl_ptr->my_hostname,
get_mtcNodeCommand_str(event_cmd));
}
rc = PASS ; rc = PASS ;
} }
print_mtc_message ( ctrl_ptr->my_hostname, MTC_CMD_TX, event, get_iface_name_str(MGMNT_INTERFACE), rc );
return rc ; return rc ;
} }