metal/mtce/src/rmon/rmonHdlr.cpp

 /*
 * Copyright (c) 2013-2017 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
 */

/**
 * @file
 * Wind River CGCS Platform Resource Monitor Handler
 */
#include "rmon.h"        /* rmon header file */
#include "rmonHttp.h"    /* for rmon HTTP libEvent utilties */
#include "rmonApi.h"     /* vswitch calls */
#include <sys/wait.h>
#include <time.h>
#include <signal.h>
#include <fstream>
#include <sstream>
#include <ctime>
#include <vector>        /* for storing dynamic resource names */
#include <dirent.h>
#include <algorithm>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <cctype>
#include <pthread.h>
#include <linux/rtnetlink.h> /* for ... RTMGRP_LINK */
#include "nlEvent.h"       /* for ... open_netlink_socket */
#include "nodeEvent.h"     /* for inotify */
#include <json-c/json.h>   /* for ... json-c json string parsing */
#include "jsonUtil.h"
#include "tokenUtil.h"     /* for ... tokenUtil_new_token */

/* Preserve a local copy of a pointer to the control struct to
 * avoid having to publish a get utility prototype into rmon.h */
static rmon_ctrl_type * _rmon_ctrl_ptr = NULL ;
static interface_resource_config_type interface_resource_config[MAX_RESOURCES] ;
static resource_config_type resource_config[MAX_RESOURCES] ;
static thinmeta_resource_config_type thinmeta_resource_config[MAX_RESOURCES] ;
static registered_clients registered_clt[MAX_CLIENTS];

static libEvent_type ceilometerEvent; // for ceilometer REST API request
static libEvent tokenEvent; // for token request

/* Used to set alarms through the FM API */
static SFmAlarmDataT alarmData;
static struct mtc_timer rmonTimer_event ;
static struct mtc_timer rmonTimer_pm ;
static struct mtc_timer rmonTimer_ntp ;

static struct mtc_timer rtimer[MAX_RESOURCES] ;
static struct mtc_timer thinmetatimer[MAX_RESOURCES] ;

static ntpStage_enum   ntp_stage ; /* The stage the ntp is in within the resource handler fsm */
static int             ntp_status ; /* status returned by the ntpq command */
static int             ntp_child_pid ;

/* for dynamic resources */
bool modifyingResources = false;
vector<string> criticality_resource;
vector<string> dynamic_resource;
vector<string> types;
vector<string> devices;
vector<int> fs_index;
vector<string> fs_state;

/** List of config files */
std::list<string> config_files ;
std::list<string>::iterator string_iter_ptr ;
std::list<string> interface_config_files ;

/* percent or abs value for fs resources */
int fs_percent = 0;
int swact_count = 0;

/* for cpu usage */
time_t t1, t2;
int num_cpus = 0;
int num_base_cpus = 0;
int included_cpu[MAX_BASE_CPU];

static string hostUUID = "";

/* Initial cpu time */
vector<unsigned long long> cpu_time_initial;
/* Later cpu time */
vector<unsigned long long> cpu_time_later;

void save_fs_resource ( string resource_name, string criticality,
                        int enabled, int percent, int abs_values[3],
                        int alarm_type, string type, string device, int mounted );
void calculate_fs_usage( resource_config_type * ptr );
void _space_to_underscore (string & str );

struct thread_data
{
   pid_t tid;
   pid_t pid;
   unsigned long long nr_switches_count;
   bool thread_running;
   double resource_usage;
   resource_config_type  * resource;
};

/* info passed to pthreads */
struct thread_data t_data;
pthread_t thread;
pthread_mutex_t lock;

/* strict memory accounting off = 0 or on = 1 */
int IS_STRICT = 0;

void mem_log_ctrl ( rmon_ctrl_type * ptr )
{
#define MAX_LEN 500
    char str[MAX_LEN] ;
    snprintf (&str[0], MAX_LEN, "%s %s %s\n",
            &ptr->my_hostname[0],
            ptr->my_address.c_str(),
            ptr->my_macaddr.c_str() );
    mem_log(str);
}

void mem_log_resource ( resource_config_type * ptr )
{
#define MAX_LEN 500
    char str[MAX_LEN] ;
    snprintf (&str[0], MAX_LEN, "Resource:%-15s Sev:%-8s Tries:%u Debounce:%d\n",
            ptr->resource, ptr->severity, ptr->count, ptr->debounce);
    mem_log(str);
}

void mem_log_interface_resource ( interface_resource_config_type * ptr )
{
#define MAX_LEN 500
    char str[MAX_LEN] ;
    snprintf (&str[0], MAX_LEN, "Resource:%-15s Sev:%-8s Debounce:%d\n",
            ptr->resource, ptr->severity, ptr->debounce);
    mem_log(str);
}

int _config_dir_load   (void);
int _config_files_load (void);


const char rmonStages_str [RMON_STAGE__STAGES][32] =
{
    "Handler-Init",
    "Handler-Start",
    "Manage-Restart",
    "Monitor-Wait",
    "Monitor-Resource",
    "Restart-Wait",
    "Ignore-Resource",
    "Handler-Finish",
    "Failed-Resource",
    "Failed-Resource-clr",
} ;

const char ntpStages_str [NTP_STAGE__STAGES][32] =
{
    "Begin",
    "Execute-NTPQ",
    "Execute-NTPQ-Wait",
} ;

registered_clients * get_registered_clients_ptr ( int index )
{
    if ( index <= _rmon_ctrl_ptr->clients )
        return ( &registered_clt[index] );
    return ( NULL );
}

rmon_ctrl_type * get_rmon_ctrl_ptr ()
{
    return _rmon_ctrl_ptr;
}

interface_resource_config_type * get_interface_ptr ( int index )
{
    if ( index <= _rmon_ctrl_ptr->interface_resources )
        return ( &interface_resource_config[index] );
    return ( NULL );
}

resource_config_type * get_resource_ptr ( int index )
{
    if ( index >= 0 && index <= _rmon_ctrl_ptr->resources )
        return ( &resource_config[index] );
    return NULL;
}

/*****************************************************************************
 *
 * Name    : get_resource_index
 *
 * Purpose : Get the resource's index based on the name
 *
 *****************************************************************************/
int get_resource_index ( const char *resource_name, int *index )
{
    for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ )
    {
        if ( strcmp(resource_config[i].resource, resource_name) == 0)
        {
           *index = i;
           return (PASS);
        }
    }
    return (FAIL);
}

/*****************************************************************************
 *
 * Name    : rmon_hdlr_fini
 *
 * Purpose : Clean up the resource monitor module
 *
 *****************************************************************************/
void rmon_hdlr_fini ( rmon_ctrl_type * ctrl_ptr )
{
    for ( int i = 0 ; i < ctrl_ptr->resources ; i++ )
    {
        // mem_log ('\n');
        mem_log_resource ( &resource_config[i] );
    }
    pthread_mutex_destroy(&lock);
    /* Turn off inotify */
    //set_inotify_close ( ctrl_ptr->fd, ctrl_ptr->wd );
}

/*****************************************************************************
 *
 * Name    : resourceStageChange
 *
 * Purpose : Put a resource in the requested stage for use by the resource handler
 *
 *****************************************************************************/
int resourceStageChange ( resource_config_type * ptr , rmonStage_enum newStage )
{
    if ((   newStage < RMON_STAGE__STAGES ) &&
            ( ptr->stage < RMON_STAGE__STAGES ))
    {
        clog ("%s %s -> %s (%d->%d)\n",
                ptr->resource,
                rmonStages_str[ptr->stage],
                rmonStages_str[newStage],
                ptr->stage, newStage);
        ptr->stage = newStage ;
        return (PASS);
    }
    else
    {
        slog ("%s Invalid Stage (now:%d new:%d)\n",
                ptr->resource, ptr->stage, newStage );
        ptr->stage = RMON_STAGE__FINISH ;
        return (FAIL);
    }
}

/*****************************************************************************
 *
 * Name    : ntpStageChange
 *
 * Purpose : Stage change handler for NTP resource
 *
 *****************************************************************************/
int ntpStageChange ( ntpStage_enum newStage )
{
    if ((newStage < NTP_STAGE__STAGES ) &&
            ( ntp_stage < NTP_STAGE__STAGES ))
    {
        clog ("NTP %s -> %s (%d->%d)\n",
                ntpStages_str[ntp_stage],
                ntpStages_str[newStage],
                ntp_stage, newStage);
        ntp_stage = newStage ;
        return (PASS);
    }
    else
    {
        slog ("NTP Invalid Stage (now:%d new:%d)\n", ntp_stage, newStage );
        ntp_stage = NTP_STAGE__BEGIN ;
        return (FAIL);
    }
}

/*****************************************************************************
 *
 * Name    : _config_files_load
 *
 * Purpose : Load the content of each config file into resource_config[x]
 *
 *****************************************************************************/
int _config_files_load (void)
{
    int i = 0 ;
    /* Run Maintenance on Inventory */
    for ( string_iter_ptr  = config_files.begin () ;
            string_iter_ptr != config_files.end () ;
            string_iter_ptr++ )
    {
        if ( i >= MAX_RESOURCES )
        {
            wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES );
            break ;
        }
        /* Read the resource config file */
        resource_config[i].mask = 0 ;
        if (ini_parse( string_iter_ptr->data(), rmon_resource_config,
                    &resource_config[i]) < 0)
        {
            ilog("Read Failure : %s\n", string_iter_ptr->data() );
        }

        else
        {
            dlog ("Config File : %s\n", string_iter_ptr->c_str());

            /* Init the timer for this resource */
            mtcTimer_reset ( rtimer[i] ) ;
            rtimer[i].service  = resource_config[i].resource ;

            resource_config[i].i                 = i ;
            /* allow to clear an existing alarm if the first reading is good
               after reboot
            */
            resource_config[i].failed            = false ;
            resource_config[i].count             = 0 ;
            resource_config[i].resource_value    = 0 ;
            resource_config[i].resource_prev     = 0 ;
            resource_config[i].stage             = RMON_STAGE__INIT ;
            resource_config[i].sev               = SEVERITY_CLEARED ;
            resource_config[i].alarm_type        = STANDARD_ALARM;
            resource_config[i].failed_send       = 0;
            resource_config[i].alarm_raised      = false;

            /* add the alarm ids for the FM API per resource monitored */
            if (strcmp(resource_config[i].resource, CPU_RESOURCE_NAME) == 0) {
                /* platform cpu utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, CPU_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__CPU_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, V_CPU_RESOURCE_NAME) == 0) {
                /* vswitch cpu utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_CPU_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__CPU_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, MEMORY_RESOURCE_NAME) == 0) {
                /* platform memory utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, MEMORY_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__MEMORY_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, V_MEMORY_RESOURCE_NAME) == 0) {
                /* vswitch memory utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_MEMORY_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__MEMORY_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, FS_RESOURCE_NAME) == 0) {
                /* platform disk utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID);
                resource_config[i].mounted = MOUNTED;
                resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, INSTANCE_RESOURCE_NAME) == 0) {
                /* platform disk utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, INSTANCE_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) {
                /* platform virtual thin pool utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_CINDER_THINPOOL_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) {
                /* platform virtual thin pool utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_NOVA_THINPOOL_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, V_PORT_RESOURCE_NAME) == 0) {
                /* vswitch port utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH,
                         V_PORT_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__PORT ;
            }
            else if (!strcmp(resource_config[i].resource, V_INTERFACE_RESOURCE_NAME) ||
                     !strcmp(resource_config[i].resource, V_LACP_INTERFACE_RESOURCE_NAME)) {
                /* vswitch interface(lacp or otherwise) utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH,
                         V_INTERFACE_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__INTERFACE ;
            }
            else if (!strcmp(resource_config[i].resource, V_OVSDB_RESOURCE_NAME)) {
                /* vswitch OVSDB manager utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH,
                         V_OVSDB_MANAGER_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__DATABASE_USAGE ;
            }
            else if (!strcmp(resource_config[i].resource, V_OPENFLOW_RESOURCE_NAME)) {
                /* vswitch Openflow utilization */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH,
                         V_OPENFLOW_CONTROLLER_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__NETWORK_USAGE ;
            }
            else if (strcmp(resource_config[i].resource, REMOTE_LOGGING_RESOURCE_NAME) == 0) {
                /* remote logging connectivity */
                snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH,
                         REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID);
                resource_config[i].res_type = RESOURCE_TYPE__CONNECTIVITY ;
            }
            else
            {
                resource_config[i].res_type = RESOURCE_TYPE__UNKNOWN ;
            }

            ilog ("Monitoring %2d: %s (%s)\n",
                    i,
                    resource_config[i].resource,
                    resource_config[i].severity);
            mem_log_resource ( &resource_config[i] );
            i++;

        }

    }

    _rmon_ctrl_ptr->resources = i ;
    ilog ("Monitoring %d Resources\n", _rmon_ctrl_ptr->resources );
    return (PASS);
}

/*****************************************************************************
 *
 * Name    : _inter_config_load
 *
 * Purpose : Load the content of each config file into interface_resource_config[x]
 *
 *****************************************************************************/
int _inter_config_load (void)
{
    int i = 0 ;

    for ( string_iter_ptr  = interface_config_files.begin () ;
            string_iter_ptr != interface_config_files.end () ;
            string_iter_ptr++ )
    {
        if ( i >= MAX_RESOURCES )
        {
            wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES );
            break ;
        }

        /* Read the interface resource config file */
        resource_config[i].mask = 0 ;
        if (ini_parse( string_iter_ptr->data(), rmon_interface_config,
                    &interface_resource_config[i]) < 0)
        {
            ilog("Read Failure : %s\n", string_iter_ptr->data() );
        }

        else
        {
            dlog ("Config File : %s\n", string_iter_ptr->c_str());
            ilog ("Monitoring %2d: %s (%s)\n", i, interface_resource_config[i].resource ,
                    interface_resource_config[i].severity );

            interface_resource_config[i].i          = i ;
            interface_resource_config[i].failed     = false ;
            interface_resource_config[i].stage      = RMON_STAGE__INIT ;
            interface_resource_config[i].sev        = SEVERITY_CLEARED ;
            interface_resource_config[i].failed_send = 0;
            interface_resource_config[i].alarm_raised = false;

            /* add the alarm ids for the FM API per resource monitored */
            if (strcmp(interface_resource_config[i].resource, OAM_INTERFACE_NAME) == 0) {
                /* add the alarm id for the FM API per resource monitored */
                snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, OAM_ALARM_ID);
                snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, OAM_PORT_ALARM_ID);
            }
            else if (strcmp(interface_resource_config[i].resource, MGMT_INTERFACE_NAME) == 0) {
                /* add the alarm id for the FM API per resource monitored */
                snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, MGMT_ALARM_ID);
                snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, MGMT_PORT_ALARM_ID);
            }
            else if (strcmp(interface_resource_config[i].resource, INFRA_INTERFACE_NAME) == 0) {
                /* add the alarm id for the FM API per resource monitored */
                snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, INFRA_ALARM_ID);
                snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, INFRA_PORT_ALARM_ID);
            }

            mem_log_interface_resource ( &interface_resource_config[i] );
            i++;

        }
    }

    _rmon_ctrl_ptr->interface_resources = i ;
    ilog ("Monitoring %d Interface Resources\n", _rmon_ctrl_ptr->interface_resources );
    return (PASS);
}

/*****************************************************************************
 *
 * Name    : _thinmeta_config_load
 *
 * Purpose : Load the content of each config file into thinmeta_config[x]
 *
 *****************************************************************************/
int _thinmeta_config_load (void)
{
    int i = 0 ;

    /* Set hard-coded defaults for all structures */
    for ( int j = 0; j < MAX_RESOURCES; j++)
    {
        thinmeta_resource_config_type * res;
        res = &thinmeta_resource_config[i];
        res->critical_threshold = THINMETA_DEFAULT_CRITICAL_THRESHOLD;
        res->alarm_on = THINMETA_DEFAULT_ALARM_ON;
        res->autoextend_on = THINMETA_DEFAULT_AUTOEXTEND_ON;
        res->autoextend_by = THINMETA_DEFAULT_AUTOEXTEND_BY;
        res->autoextend_percent = THINMETA_DEFAULT_AUTOEXTEND_PERCENT;
        res->audit_period = THINMETA_DEFAULT_AUDIT_PERIOD;
    }

    /* Load resources */
    for ( string_iter_ptr  = config_files.begin () ;
            string_iter_ptr != config_files.end () ;
            string_iter_ptr++ )
    {
        if ( i >= MAX_RESOURCES )
        {
            wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES );
            break ;
        }
        /* Read the resource config file */
        if (ini_parse( string_iter_ptr->data(), rmon_thinmeta_config,
                    &thinmeta_resource_config[i]) < 0)
        {
            ilog("Read Failure : %s\n", string_iter_ptr->data() );
        }
        else
        {
            thinmeta_resource_config_type * res;
            res = &thinmeta_resource_config[i];
            if (!res->section_exists)
            {
                dlog3 ("Config File : %s does not have a [%s] section\n",
                        string_iter_ptr->c_str(), THINMETA_CONFIG_SECTION);
                continue;
            }
            dlog ("Config File : %s\n", string_iter_ptr->c_str());

            /* validate loaded configuration */
            if (!res->vg_name || !res->thinpool_name)
            {
                elog("Invalid VG and/or thinpool names for thinpool metadata "
                     "in config file: %s, disabling monitoring", string_iter_ptr->c_str());
                res->critical_threshold = RESOURCE_DISABLE;
                res->vg_name = THINMETA_INVALID_NAME;
                res->thinpool_name = THINMETA_INVALID_NAME;
            }
            else if (res->critical_threshold > 99)
            {
                elog("Metadata monitoring error in config file: %s. Option critical_threshold > 99%%, "
                     "value in config file: %i, disabling monitoring",
                         string_iter_ptr->c_str(), res->critical_threshold)
                    res->critical_threshold = 0;
            }
            else if (res->alarm_on > 1)
            {
                elog("Metadata monitoring error in config file: %s. Option alarm_on is NOT boolean, "
                     "value in config file: %i, disabling monitoring", string_iter_ptr->c_str(), res->alarm_on);
                res->critical_threshold = RESOURCE_DISABLE;
            }
            else if (res->autoextend_on > 1)
            {
                elog("Metadata monitoring error in config file: %s. Option autoextend_on is NOT boolean, "
                     "value in config file: %i, disabling monitoring",
                         string_iter_ptr->c_str(), res->autoextend_on)
                    res->critical_threshold = RESOURCE_DISABLE;
            }
            else if (res->autoextend_percent > 1)
            {
                elog("Metadata monitoring error in config file: %s. Option autoextend_percent is NOT boolean, "
                     "value in config file: %i, disabling monitoring",
                         string_iter_ptr->c_str(), res->autoextend_percent)
                res->critical_threshold = RESOURCE_DISABLE;
            }
            else if ((res->autoextend_percent && res->autoextend_by > 100) ||
                       (res->autoextend_on && res->autoextend_by < 1))
            {
                elog("Metadata monitoring error in config file: %s. Option autoextend_by not in [1,100] interval, "
                    "value in config file: %i, disabling monitoring",
                        string_iter_ptr->c_str(), res->autoextend_by)
                res->critical_threshold = RESOURCE_DISABLE;
            }
            else if ((res->audit_period < 1) || (res->audit_period > 10000))
            {
                elog("Metadata monitoring error in config file: %s. Option audit_period not in [1,10000] interval, "
                    "value in config file: %i, disabling monitoring",
                        string_iter_ptr->c_str(), res->audit_period)
                res->critical_threshold = RESOURCE_DISABLE;
            }

            ilog ("%s/%s pool metadata monitored; resource index: %2d\n", res->vg_name ,
                    res->thinpool_name, i );
            i++;
        }

    }

    _rmon_ctrl_ptr->thinmeta_resources = i ;
    ilog ("Monitoring %d Thinpool Metadata Resources\n", _rmon_ctrl_ptr->thinmeta_resources );
    return (PASS);
}

/*****************************************************************************
 *
 * Name    : rmon_hdlr_init
 *
 * Purpose : Init the handler but also support re-init that might occur over a SIGHUP
 *
 *****************************************************************************/

#define RMON_TIMER_TYPE__EVENT "event"
#define RMON_TIMER_TYPE__PM    "pm"
#define RMON_TIMER_TYPE__NTP   "ntp"
#define RMON_TIMER_TYPE__RES   "resource"
#define RMON_TIMER_TYPE__THIN  "thinpool"

int rmon_hdlr_init ( rmon_ctrl_type * ctrl_ptr )
{
    /* Save the control pointer */
    _rmon_ctrl_ptr = ctrl_ptr ;

    mtcTimer_init ( rmonTimer_event, LOCALHOST, RMON_TIMER_TYPE__EVENT) ;
    mtcTimer_init ( rmonTimer_pm,    LOCALHOST, RMON_TIMER_TYPE__PM ) ;

    if (is_controller())
       mtcTimer_init ( rmonTimer_ntp,LOCALHOST, RMON_TIMER_TYPE__NTP ) ;

    for ( int i = 0 ; i < MAX_RESOURCES ; i++ )
        mtcTimer_init ( rtimer[i], LOCALHOST, RMON_TIMER_TYPE__RES );
    ctrl_ptr->resources = 0 ;

    for ( int i = 0 ; i < MAX_RESOURCES ; i++ )
        mtcTimer_init ( thinmetatimer[i], LOCALHOST, RMON_TIMER_TYPE__THIN  );
    ctrl_ptr->thinmeta_resources = 0 ;

    /* Initialize the Resource Monitor Array */
    memset ( (char*)&resource_config[0], 0, sizeof(resource_config_type)*MAX_RESOURCES);
    memset ( (char*)&interface_resource_config[0], 0, sizeof(interface_resource_config_type)*MAX_RESOURCES);
    memset ( (char*)&thinmeta_resource_config[0], 0, sizeof(thinmeta_resource_config_type)*MAX_RESOURCES);
    memset ( (char*)&registered_clt[0], 0, sizeof(registered_clients)*MAX_CLIENTS);

    /* Read in the list of config files and their contents */
    load_filenames_in_dir ( CONFIG_DIR, config_files ) ;
    /* Read in the list of interface config files and their contents */
    load_filenames_in_dir ( INT_CONFIG_DIR, interface_config_files ) ;

    _thinmeta_config_load();
    _config_files_load ();
    // _inter_config_load ();

    /* init Thin Metadata Monitoring after config reload - including timers */
    thinmeta_init(thinmeta_resource_config, thinmetatimer, ctrl_ptr->thinmeta_resources);

    /* Log the control setting going into the main loop */
    mem_log_ctrl ( _rmon_ctrl_ptr );

    /* Initialize instance mount monitoring */
    if (pthread_mutex_init(&lock, NULL) != 0)
    {
        elog("mutex init failed \n");
    }

    t_data.thread_running = false;
    t_data.resource_usage = MOUNTED;
    t_data.nr_switches_count = 0;
    t_data.pid = getpid();

    return (PASS) ;
}

/*****************************************************************************
 *
 * Name    : _set_severity
 *
 * Purpose : Restores the resource value and the severity of the alarm
 *
 *****************************************************************************/
void _set_resource_usage ( string reason_text, resource_config_type * ptr )
{
    unsigned int found;
    string res_val;
    size_t last_index;
    string temp_val;
    char resource_usage[10];

    /* extract the resource value from the reason text */
    found = reason_text.find_last_of( ' ' );
    temp_val = reason_text.substr(found+1);
    last_index = temp_val.find_first_not_of("0123456789");
    res_val = temp_val.substr(0, last_index);
    snprintf (resource_usage, sizeof(resource_usage), res_val.c_str());
    sscanf(resource_usage, "%lf", &ptr->resource_value);
}

/*****************************************************************************
 *
 * Name    : build_entity_instance_id
 *
 * Purpose : build the alarm's entity_instance_id based on the
 *           resource type and alarm type.
 *
 *****************************************************************************/
void build_entity_instance_id ( resource_config_type *ptr,  char *entity_instance_id )
{
    dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type);

    // Make certain the id is cleared
    entity_instance_id[0] = 0;

    if ( ptr->alarm_type == DYNAMIC_ALARM )
    {
        if ((ptr->type != NULL) && (strcmp(ptr->type, "lvg") == 0 ))
        {
            /* This case covers volume groups */
            /* Use host=<x>.volumegroup=type for id*/
            snprintf((char*)entity_instance_id, FM_MAX_BUFFER_LENGTH,  "%s.volumegroup=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource);
        }
        else
        {
            /* Use host=<x>.filesystem=type for id*/
             snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH,  "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource);
        }
    }
    else if  ( ptr->alarm_type == STATIC_ALARM )
    {
        /* Use host=<x>.filesystem=type for id*/
        snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH,  "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource);
    }
    else if ((ptr->alarm_type == STANDARD_ALARM) && (strstr(ptr->resource, V_MEMORY_RESOURCE_NAME) != NULL))
    {
        /* AVS memory */
        snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH,  "%s.processor=%d", _rmon_ctrl_ptr->my_hostname, ptr->socket_id);
    }
    else if (strstr(ptr->resource, V_CINDER_THINPOOL_RESOURCE_NAME) != NULL)
    {
        /* Cinder thin pool alarm should not be raised against a specific host */
        /* as the volumes are synced between controllers through drbd. */
        /* Instead we use a common entity instance id for both controllers. */
        snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, "host=controller");
    }
    else
    {
        /* Use hostname for alarm */
        snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, _rmon_ctrl_ptr->my_hostname);
    }

    dlog ("resource %s entity instance id: %s\n", ptr->resource, entity_instance_id);

    return;
}


/*****************************************************************************
 *
 * Name    : thinpool_virtual_space_usage_init
 *
 * Purpose : Determine if we should monitor virtual usage or not: no purpose
 *           in doing so if thin provisioning is not used.
 *
 * Params  : index - the index of the virtual space resource
 *
 * Return  : None.
 *
 *****************************************************************************/
void thinpool_virtual_space_usage_init(int index,
                                       const char *poolName,
                                       const char *poolOwner) {

    if (!poolName or !poolOwner) {
        slog ("No poolName or poolOwner provided");
        return;
    }
    ilog("index = %d, poolName = %s, poolOwner = %s", index, poolName, poolOwner);

    /* Buffer (and its size) for keeping the initial result after executing
       the above command. */
    char current_pool_type[BUFFER_SIZE];
    const unsigned int buffer_size = BUFFER_SIZE;
    /* The command for seeing if the pool type is thin. */
    char lvm_thin_cmd[BUFFER_SIZE];
    const char *thin_pool_expected_result = NULL;

    MEMSET_ZERO(current_pool_type);
    MEMSET_ZERO(lvm_thin_cmd);

    if (strcmp(poolName, "nova-local-pool") == 0) {
        const char *nova_thin_pool_expected_result = "thin-pool";
        thin_pool_expected_result = nova_thin_pool_expected_result;
        sprintf(lvm_thin_cmd, "lvs --segments | grep \"%s\" | awk '{print $5}'", poolName);
    }
    else if (strcmp(poolName, "cinder-volumes-pool") == 0) {
        const char *cinder_thin_pool_expected_result = "thin";
        thin_pool_expected_result = cinder_thin_pool_expected_result;
        sprintf(lvm_thin_cmd, "cat /etc/cinder/cinder.conf | awk -F = '/^lvm_type.*=.*/ { print $2; }' | tail -n 1 | tr -d ' '");
    }
    else {
        slog("Invalid pool name given.");
        return;
    }

    /* Result code. */
    int rc;

    /* Execute the command. */
    rc = execute_pipe_cmd(lvm_thin_cmd, current_pool_type, buffer_size);

    /* If the command has been executed successfuly, continue. */
    if (rc == PASS) {
        if (current_pool_type != NULL) {
            /* If the pool type is not thin, disable the alarm for virtual
               usage. */
            ilog("%s current pool type is set to = %s", poolOwner, current_pool_type);
            if(strcmp(current_pool_type, thin_pool_expected_result) != 0) {
                resource_config[index].alarm_status = ALARM_OFF;
                ilog("%s LVM Thinpool Usage alarm off: thin provisioning not used", poolOwner);
            } else {
                resource_config[index].alarm_status = ALARM_ON;
                ilog("%s LVM Thinpool Usage alarm on: thin provisioning used", poolOwner);
            }
        }
    } else {
        resource_config[index].alarm_status = ALARM_OFF;
        elog("%s LVM Thinpool monitoring state unknown ; alarm disabled (rc:%i)",
             poolOwner, rc);
    }
}

/*****************************************************************************
 *
 * Name    : virtual_space_usage_init
 *
 * Purpose : Determine if we should monitor virtual usage or not: no purpose
 *           in doing so if thin provisioning is not used.
 *
 * Return  : None.
 *
 *****************************************************************************/

void virtual_space_usage_init(const char* resource_name) {

    ilog ("Initialize thin pools for resource %s\n", resource_name);
    int index;
    if ( get_resource_index( resource_name, &index ) == PASS ) {

        if (strcmp(resource_name, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) {
            thinpool_virtual_space_usage_init(index,"cinder-volumes-pool","Cinder");

        } else if (strcmp(resource_name, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) {
            thinpool_virtual_space_usage_init(index, "nova-local-pool","Nova");
        }
    }
    else {
        wlog ("failed get_resource_index for resource %s\n", resource_name);
    }
}

/*****************************************************************************
 *
 * Name    : rmon_alarming_init
 *
 * Purpose : Clears any previously raised rmon alarms if rmon is restarted
 *
 *****************************************************************************/
void rmon_alarming_init ( resource_config_type * ptr )
{
    dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type);

    AlarmFilter alarmFilter;

    SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) calloc (1, sizeof (SFmAlarmDataT));
    if (active_alarm == NULL)
    {
       elog("Failed to allocate memory for SFmAlarmDataT\n");
       return;
    }

    build_entity_instance_id (ptr, alarmData.entity_instance_id);

    snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id);
    snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id);

    if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK)
    {
        if (active_alarm != NULL) {

            string reasonText(active_alarm->reason_text);
            /* Set the resource severity */
            ptr->failed = true;
            ptr->alarm_raised = true;
            ptr->count = ptr->num_tries;
            if ( active_alarm->severity == FM_ALARM_SEVERITY_MINOR )
            {
                ptr->sev = SEVERITY_MINOR;
            }
            else if ( active_alarm->severity == FM_ALARM_SEVERITY_MAJOR )
            {
                ptr->sev = SEVERITY_MAJOR;
                if ( ptr->res_type == RESOURCE_TYPE__FILESYSTEM_USAGE )
                {
                    string err_res_name(ptr->resource);
                    _space_to_underscore(err_res_name);

                    /* clear host degrade for fs usage alarms */
                    snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), "%s %s:",
                             err_res_name.c_str(),
                             DEGRADE_CLEAR_MSG );

                    rmon_send_request ( ptr, _rmon_ctrl_ptr->clients );
                }
            }
            else
            {
                ptr->sev = SEVERITY_CRITICAL;
            }
            resourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT );

            if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) != 0)
            {
                /* Set the resource severity */
                _set_resource_usage( reasonText, ptr );
                ilog ("%s setting previously failed resource alarm id: %s entity_instance_id: %s usage: %0.2f\n",
                          ptr->resource, ptr->alarm_id, alarmFilter.entity_instance_id, ptr->resource_value);
            }
            else
            {
                ilog ("%s setting previously failed resource alarm id: %s entity_instance_id: %s\n",
                          ptr->resource, ptr->alarm_id, alarmFilter.entity_instance_id);
            }
        }
    }
    free(active_alarm);
}

/*****************************************************************************
 *
 * Name    : send_clear_msg
 *
 * Purpose : Send a message to all registered clients to set the node to
 * available (clear the degrade)
 *
 *****************************************************************************/
void send_clear_msg ( int index )
{
    int count = 0;
    AlarmFilter alarmFilter;

    SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) calloc (1, sizeof (SFmAlarmDataT));
    if (active_alarm == NULL)
    {
       elog("Failed to allocate memory for SFmAlarmDataT\n");
       return;
    }

    string err_res_name(resource_config[index].resource);
    _space_to_underscore(err_res_name);
    snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, resource_config[index].alarm_id);

    build_entity_instance_id (&resource_config[index], alarmData.entity_instance_id);

    snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id);

    /* Notify rmon clients of fault being cleared */
    snprintf(resource_config[index].errorMsg, sizeof(resource_config[index].errorMsg),
             "%s cleared_alarms_for_resource:", err_res_name.c_str());

    /* check if there is an alarm first for this resource. If there is not then the node  */
    /* should not be in a degrade state                                                   */
    EFmErrorT ret = fm_get_fault( &alarmFilter, active_alarm);
    if ( (ret == FM_ERR_OK) && (active_alarm != NULL) )
    {
        while (( rmon_send_request ( &resource_config[index], _rmon_ctrl_ptr->clients ) != PASS ) && (count < 3 ))
        {
            wlog ("%s request send failed \n", resource_config[index].resource);
            count++;
        }
        if (count > 2)
        {
            wlog ("%s request send failed, count:%d \n", resource_config[index].resource, count);
            resource_config[index].failed_send++;
        }
        if ((resource_config[index].failed_send == MAX_FAIL_SEND) || (count < 3))
        {
            /* Reset the values to defaults */
            swact_count = 0;
            ilog("Setting resource: %s back to defaults \n", resource_config[index].resource);
            resource_config[index].failed = false ;
            resource_config[index].alarm_raised = false ;
            resource_config[index].count = 0 ;
            resource_config[index].sev = SEVERITY_CLEARED ;
            resource_config[index].stage = RMON_STAGE__START ;
            resource_config[index].failed_send = 0;
        }
    }
    else //alarm not found or error
    {
        if (ret == FM_ERR_ENTITY_NOT_FOUND)
        {
            dlog ("Alarm not found for resource: %s entity_instance_id: %s \n", alarmFilter.alarm_id, alarmFilter.entity_instance_id);
        }
        else
        {
            wlog ("fm_get_fault failed for resource: %s entity_instance_id: %s err: %d\n", alarmFilter.alarm_id,
            alarmFilter.entity_instance_id, ret);
        }

        if (active_alarm == NULL)
        {
           elog("fm_get_fault returned null active_alarm\n");
        }

        swact_count++;
        if (swact_count == MAX_SWACT_COUNT)
        {
            /* Reset the values to defaults */
            while (( rmon_send_request ( &resource_config[index], _rmon_ctrl_ptr->clients ) != PASS ) && (count < 3 ))
            {
                wlog ("%s request send failed \n", resource_config[index].resource);
                count++;
            }
            swact_count = 0;
            ilog("Setting resource: %s back to defaults \n", resource_config[index].resource);
            resource_config[index].failed = false ;
            resource_config[index].alarm_raised = false ;
            resource_config[index].count = 0 ;
            resource_config[index].sev = SEVERITY_CLEARED ;
            resource_config[index].stage = RMON_STAGE__START ;
            resource_config[index].failed_send = 0;
        }
    }
    free(active_alarm);
}

/*****************************************************************************
 *
 * Name    : read_fs_file
 *
 * Purpose : read the memory mapped dynamic file system file
 *****************************************************************************/
void read_fs_file ( vector<string> & dynamic_resources )
{
    FILE * pFile;
    char buf[MAX_LEN];
    int fd;
    string delimiter = ",";
    size_t pos;
    string token;
    struct stat fileInfo;
    struct flock fl;

    memset ((char *)&fileInfo, 0 , sizeof(fileInfo));

    fl.l_whence = SEEK_SET;
    fl.l_start  = 0;
    fl.l_len    = 0;
    fl.l_pid    = getpid();

    pFile = fopen (DYNAMIC_FS_FILE , "r");
    if (pFile != NULL) {

        fd = fileno(pFile);
        /* lock the file */
        fl.l_type   = F_RDLCK;

        /* lock the file for read and write */
        fcntl(fd, F_SETLKW, &fl);

        if (fd == -1)
        {
            elog("Error opening file for reading");
        }

        if (fstat(fd, &fileInfo) == -1)
        {
            elog("Error getting the file size");
        }

        char *map = static_cast<char*>( mmap(0, fileInfo.st_size, PROT_READ, MAP_SHARED, fd, 0));
        if (map == MAP_FAILED)
        {
            elog("Error mmapping the file");
        }
        string str(map);

        snprintf( buf, MAX_LEN, str.c_str());
        /* free the mmapped memory */
        if (munmap(map, fileInfo.st_size) == -1)
        {
            elog("Error un-mmapping the file");
        }
        fclose(pFile);
        /* unlock the file */
        fl.l_type = F_UNLCK;
        fcntl(fd, F_SETLK, &fl);

        while ((pos = str.find(delimiter)) != string::npos) {
            /* separate the resources from the file */
            token = str.substr(0, pos);
            dynamic_resources.push_back(token);
            dlog("reading resource %s \n", token.c_str());
            str.erase(0, pos + delimiter.length());
        }
    }
}

/*****************************************************************************
 *
 * Name    : add_dynamic_fs_resource
 *
 * Purpose : Add the dynamic file system resources
 *****************************************************************************/
void add_dynamic_fs_resource ( bool send_response )
{
#ifdef WANT_FS_MONITORING
    char resource[50];
    char temp_resource[50];
    char device [50];
    char mount_point[50];
    char temp_state[20];
    char type [50];
    char buf[200];
    string criticality = "critical";
    vector<string> resource_list;
    int absolute_thresholds[3];

    memset(absolute_thresholds, 0, sizeof(absolute_thresholds));
    fs_index.clear();
    fs_state.clear();

    /* get a list of all the dynamic fs mounts */
    read_fs_file(resource_list);

    for(std::vector<string>::iterator it = resource_list.begin(); it != resource_list.end(); ++it)
    {
        string str = *it;
        snprintf(buf, sizeof(buf), str.c_str());

        // For resources without mounts the mount_point will be NULL
        memset(&mount_point[0], 0, sizeof(mount_point));
        sscanf(buf, "%49s %19s %49s %49s %49s", temp_resource, temp_state, type, device, mount_point);
        string state(temp_state);

        bool found = false;

        if (mount_point[0] != '\0')
        {
           // for resources with mounts, the resource name is the mount value
           snprintf(resource, FM_MAX_BUFFER_LENGTH, mount_point);
        }
        else
        {
           // for resources without mounts, the resource name is the device value
           snprintf(resource, FM_MAX_BUFFER_LENGTH, device);
        }

        /* the dynamic file system is enabled, add it if need be */
        for (int i=0; i<_rmon_ctrl_ptr->resources; i++)
        {
            if ( strcmp(resource, resource_config[i].resource) == 0)
            {
                dlog ("resource %s already exists, update the state to %s \n", resource, state.c_str());
                /* resource already exists no need to add it again */
                /* update the state, it may have changed            */
                fs_index.push_back(i);
                fs_state.push_back(state);
                found = true;
                break;
            }
        }

        if (!found) // new resource to monitor, lets add it
        {
            int enabled_resource = ALARM_OFF;
            if (strcmp(temp_state,"enabled") == 0)
            {
                enabled_resource = ALARM_ON;
            }

            if (mount_point[0] != '\0')
            {
                save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, DYNAMIC_ALARM, type, device, MOUNTED );
            }
            else
            {
                save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, DYNAMIC_ALARM, type, device, NOT_MOUNTED );
            }

            if (enabled_resource == ALARM_ON) {
                calculate_fs_usage( &resource_config[_rmon_ctrl_ptr->resources - 1] );
                rmon_alarming_init( &resource_config[_rmon_ctrl_ptr->resources - 1] );
            }
        }
    }
#endif
    if (send_response)
    {
#ifdef WANT_FS_MONITORING
        ilog ("sending response to dynamic FS add, to the rmon client\n");
#else
        ilog("dynamic filesystem monitoring moved to collectd\n");
#endif
        /* let the rmon client know that we are done with the file */
        rmon_resource_response(_rmon_ctrl_ptr->clients);
    }
}

/*****************************************************************************
 *
 * Name    : clear_alarm_for_resource
 *
 * Purpose : Clear the alarm of the resource passed in
 *
 *****************************************************************************/
void clear_alarm_for_resource ( resource_config_type * ptr )
{
    dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type);
    AlarmFilter alarmFilter;

    build_entity_instance_id (ptr, alarmData.entity_instance_id);

    snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id);
    snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id);

    int ret = rmon_fm_clear(&alarmFilter);
    if (ret == FM_ERR_OK)
    {
       ilog ("Cleared stale alarm %s for entity instance id: %s", alarmFilter.alarm_id, alarmFilter.entity_instance_id);
    }
    else if (ret == FM_ERR_ENTITY_NOT_FOUND)
    {
       dlog ("Stale alarm %s for entity instance id: %s was not found", alarmFilter.alarm_id, alarmFilter.entity_instance_id);
    }
    else
    {
       wlog ("Failed to clear stale alarm %s for entity instance id: %s error: %d", alarmFilter.alarm_id, alarmFilter.entity_instance_id, ret);
    }
}


/*****************************************************************************
 *
 * Name    : process_dynamic_fs_file
 *
 * Purpose : read the dynamic files directory and add the dynamic filesystem
 * resources when the file is updated
 *****************************************************************************/
void process_dynamic_fs_file()
{
    int index = 0;

    pthread_mutex_lock(&lock);
    modifyingResources = true;
    pthread_mutex_unlock(&lock);

    add_dynamic_fs_resource(true);

    pthread_mutex_lock(&lock);
    modifyingResources = false;
    pthread_mutex_unlock(&lock);

    /* deal with changes of dynamic file system enabled state */
    for (unsigned int i=0; i<fs_index.size(); i++)
    {
        index = fs_index.at(i);
        if ( strcmp(fs_state.at(i).c_str(), "disable") == 0 )
        {
            /* resource has been disabled, stop alarming on it */
            ilog("%s is no longer enabled\n", resource_config[index].resource);

            if ( resource_config[index].failed == true )
            {
                resource_config[index].alarm_status = ALARM_OFF;

                if ( _rmon_ctrl_ptr->clients > 0 )
                {
                   //send a clear degrade node
                   send_clear_msg(index);
                }

                // we need to clear the resource's alarm if there was any set for this resource
                clear_alarm_for_resource(&resource_config[index]);
            }
            else
            {
                /* There was no active alarm to clear */
                ilog("Setting resource: %s back to defaults \n", resource_config[index].resource);
                resource_config[index].alarm_status = ALARM_OFF;
                resource_config[index].failed = false;
                resource_config[index].alarm_raised = false;
                resource_config[index].count = 0 ;
                resource_config[index].sev = SEVERITY_CLEARED ;
                resource_config[index].stage = RMON_STAGE__START ;
            }
        }
        else if ( strcmp(fs_state.at(i).c_str(), "enabled") == 0 )
        {
            // resource has been enabled
            if ( resource_config[index].alarm_status == ALARM_OFF )
            {
                /* Turn the resource checking back on if it was off */
                resource_config[index].alarm_status = ALARM_ON;

                //reset values
                resource_config[index].failed = false;
                resource_config[index].alarm_raised = false;
                resource_config[index].count = 0 ;
                resource_config[index].sev = SEVERITY_CLEARED ;
                resource_config[index].stage = RMON_STAGE__START ;

                rmon_alarming_init( &resource_config[index] );

                ilog("%s is now enabled \n", resource_config[index].resource);
                if (strcmp(resource_config[index].resource, CINDER_VOLUMES) == 0)
                {
                    virtual_space_usage_init(V_CINDER_THINPOOL_RESOURCE_NAME);
                }
                if (strcmp(resource_config[index].resource, NOVA_LOCAL) == 0)
                {
                    virtual_space_usage_init(V_NOVA_THINPOOL_RESOURCE_NAME);
                }
            }
            else // alarm aready on (enabled)
            {
                ilog("%s is already enabled \n", resource_config[index].resource);
            }
        }
        else
        {
           wlog("%s invalid dynamic file system state: %s \n", resource_config[index].resource, fs_state.at(i).c_str());
        }
    }
}

/*****************************************************************************
 *
 * Name    : process_static_fs_file
 *
 * Purpose : Reads in the list of static file systems for monitoring
 *
 *****************************************************************************/
void process_static_fs_file()
{
    FILE * pFile;
    vector<string> mounts;
    char buf[MAX_LEN];
    char resource[50];
    char type[50];
    char device[50];
    bool found = false;
    int enabled_resource = ALARM_ON;
    string criticality = "critical";
    int absolute_thresholds[3] = {0};

    pFile = fopen (STATIC_FS_FILE , "r");
    if (pFile != NULL) {
        ifstream fin( STATIC_FS_FILE );
        string line;

        while( getline( fin, line )) {
            /* process each line */
            mounts.push_back(line);
        }
        fclose(pFile);


        for(std::vector<string>::iterator it = mounts.begin(); it != mounts.end(); ++it)
        {
            string str = *it;
            snprintf(buf, MAX_LEN, str.c_str());
            sscanf(buf, "%49s %49s %49s %d %d %d", resource, device, type, &absolute_thresholds[0], &absolute_thresholds[1], &absolute_thresholds[2]);

            if (!found)
            {
                if (fs_percent == PERCENT_USED)
                {
                    /* do not use the absolute thresholds */
                    memset(absolute_thresholds, 0, sizeof(absolute_thresholds));
                }
                /* add the resource */
                save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, STATIC_ALARM, type, device, MOUNTED );
                calculate_fs_usage( &resource_config[_rmon_ctrl_ptr->resources - 1] );
            }
        }
    }
    else
    {
        elog("Error, no static file system file present at: %s\n", STATIC_FS_FILE);
    }
}

/*****************************************************************************
 *
 * Name    : rmon_timer_handler
 *
 * Purpose : Looks up the timer ID and asserts the corresponding ringer
 *
 *****************************************************************************/
void rmon_timer_handler ( int sig, siginfo_t *si, void *uc)
{
    timer_t * tid_ptr = (void**)si->si_value.sival_ptr ;

    /* Avoid compiler errors/warnings for parms we must
     * have but currently do nothing with */
    UNUSED(sig);
    UNUSED(uc);

    if ( !(*tid_ptr) )
    {
        // tlog ("Called with a NULL Timer ID\n");
        return ;
    }

    /* is event rmon timer */
    if ( *tid_ptr == rmonTimer_event.tid )
    {
        mtcTimer_stop_int_safe ( rmonTimer_event);
        rmonTimer_event.ring = true ;
    }

    else if ( *tid_ptr == rmonTimer_pm.tid )
    {
        mtcTimer_stop_int_safe ( rmonTimer_pm);
        rmonTimer_pm.ring = true ;
    }

    else if ( (is_controller()) && (*tid_ptr == rmonTimer_ntp.tid) )
    {
        mtcTimer_stop_int_safe ( rmonTimer_ntp);
        rmonTimer_ntp.ring = true ;
    }

    else
    {
        bool found = false ;
        for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ )
        {
            if ( *tid_ptr == rtimer[i].tid )
            {
                mtcTimer_stop_int_safe ( rtimer[i] );
                rtimer[i].ring = true ;
                found = true ;
                break ;
            }
        }
        if ( !found )
        {
            for ( int i = 0 ; i < _rmon_ctrl_ptr->thinmeta_resources ; i++ )
            {
                if ( *tid_ptr == thinmetatimer[i].tid )
                {
                    mtcTimer_stop_int_safe ( thinmetatimer[i] );
                    thinmetatimer[i].ring = true ;
                    found = true ;
                    break ;
                }
            }
        }
        if ( !found )
        {
            /* try and cleanup by stopping this unknown timer via its tid */
            mtcTimer_stop_tid_int_safe (tid_ptr);
        }
    }
}

/*****************************************************************************
 *
 * Name    : clear_ntp_alarms
 *
 * Purpose : Loop through each current alarms and deleted them if the server
 * is now reachable or the server no longer is assigned to ntpq
 *
 *****************************************************************************/
void clear_ntp_alarms(std::list<string> &non_reachable_ntp_servers, unsigned int alarm_count, SFmAlarmDataT *active_alarms, bool clear_major_alarm)
{
    dlog ("Total NTP alarm_count:%d", alarm_count);
    AlarmFilter alarmFilter;
    char alarm_to_search[FM_MAX_BUFFER_LENGTH];

    fm_alarm_id alarm_id;
    snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID);

    // clear the major alarms if required
    if (clear_major_alarm)
    {
        snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID );
        snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp", _rmon_ctrl_ptr->my_hostname);

        int ret = rmon_fm_clear(&alarmFilter);
        if (ret != FM_ERR_OK)
        {
            if (ret != FM_ERR_ENTITY_NOT_FOUND)
            {
               wlog ("Failed to clear major alarm %s for entity instance id:%s error:%d", NTP_ALARM_ID, alarmFilter.entity_instance_id, ret);
            }
        }
        else
        {
            ilog ("Cleared major alarm %s for entity instance id:%s", NTP_ALARM_ID, alarmFilter.entity_instance_id);
        }
    }

    if (active_alarms == NULL)
    {
        elog ("Null pointer for active_alarms");
        return;
    }

    // clear minor alarms if required
    bool found;
    std::list<string>::iterator iter;
    std::list<string>::iterator iter_bad_list;

    // for each NTP alarms in the system see if it match any of the invalid NTP servers
    // if it does not match then the alarm must be removed since that NTP server
    // is no longer being monitored or is now valid
    for ( unsigned int i = 0; i < alarm_count; i++ )
    {
        if ( ((active_alarms+i)->severity) == FM_ALARM_SEVERITY_MINOR )
        {
            // Verify that this NTP minor alarm is still valid, This server could no longer exist or is now marked
            // reachable
            dlog ("Verify NTP minor alarm is still valid, entity instance id:%s",  (active_alarms+i)->entity_instance_id);

            found = false;

            // check for stale minor alarm
            for ( iter = non_reachable_ntp_servers.begin (); iter != non_reachable_ntp_servers.end (); iter++ )
            {
                // e.g. host=controller-0.ntp=102.111.2.2
                snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH,  "%s.ntp=%s", _rmon_ctrl_ptr->my_hostname, iter->c_str());

                dlog ("Non reachable NTP server to search %s", iter->c_str());

                if (strstr((active_alarms+i)->entity_instance_id, iter->c_str()) != NULL)
                {
                   // server is in non reachable list, do not clear it
                   found = true;
                   dlog ("Alarm is still valid %s", iter->c_str());
                   break;
                }
            }

            if (!found)
            {
                // lets clear it but only if it's this controller's alarm, it could be the peer controller's alarm
                if (strstr((active_alarms+i)->entity_instance_id, _rmon_ctrl_ptr->my_hostname) != NULL)
                {
                    snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID);
                    snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", (active_alarms+i)->entity_instance_id);

                    if (rmon_fm_clear(&alarmFilter) != FM_ERR_OK)
                    {
                        wlog ("Failed to clear minor alarm %s for entity instance id:%s", NTP_ALARM_ID, (active_alarms+i)->entity_instance_id);
                    }
                    else
                    {
                        ilog ("Cleared minor alarm %s for entity instance id:%s", NTP_ALARM_ID, (active_alarms+i)->entity_instance_id);
                    }
                }
            }
        }
    }
}

/*****************************************************************************
 *
 * Name    : ntp_query_results
 *
 * Purpose : Analyze the return code from script query_ntp_servers.sh.
 * Create alarms if the servers are non reachable, Clear alarms if they are
 * now reachable
 *
 *****************************************************************************/
void ntp_query_results (int ntp_query_status )
{
    dlog ("ntp_query_results ntp_query_status:%d", ntp_query_status);

    std::list<string> non_reachable_ntp_servers;

    // if no NTP servers are provisioned on the system, we still need to clear old NTP
    // alarms if there are any. But we do not need to read the tmp server file.
    if (ntp_query_status != NTP_NOT_PROVISIONED)
    {
        // read the temp file which contains a list of reachable and non reachable servers
        // this file is the output from the query_ntp_servers.sh script

        const char *server_info = "/tmp/ntpq_server_info";
        FILE *pFile;
        pFile = fopen(server_info, "r");
        if (pFile != NULL)
        {
            const char * delim = ";\n\r";
            char * ip;
            char line[500];

            int pos = 0;
            while ( memset(line, 0, sizeof(line)) && (fgets((char*) &line, sizeof(line), pFile) != NULL) )
            {
              // the first line in the tmp file is the reachable servers, the second is the non reachable servers
              if (pos == 1)
              {
                  for (ip = strtok (line, delim); ip;  ip = strtok (NULL, delim))
                  {
                    non_reachable_ntp_servers.push_back(ip);
                    dlog("Found non reachable NTP servers:%s\n", ip);
                  }
                  break;
              }
              pos++;
            }
            fclose(pFile);
        }
        else
        {
            elog("Failed to open file: %s\n", server_info);
            return;
        }
    }

    // retreive all the current NTP alarms
    int rc;
    unsigned int max_alarms=75;
    fm_alarm_id alarm_id;
    snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID);
    SFmAlarmDataT *active_alarms = (SFmAlarmDataT*) calloc (max_alarms, sizeof (SFmAlarmDataT));
    if (active_alarms == NULL)
    {
        elog ("Failed to allocate memory for NTP alarms");
        return;
    }

    int ret = fm_get_faults_by_id( &alarm_id, active_alarms, &max_alarms);
    if (!(ret == FM_ERR_OK || ret == FM_ERR_ENTITY_NOT_FOUND))
    {
       elog ("fm_get_faults_by_id failed trying to retreive all the NTP alarms, error:%d", ret);
       free(active_alarms);
       return;
    }

    // Clear alarms if required

    bool clear_major_alarm =  false;
    bool created_major_alarm = false;

    if ( ntp_query_status == NTP_NOT_PROVISIONED || ntp_query_status == NTP_SOME_REACHABLE || ntp_query_status == NTP_OK )
    {
       // We are going to clear the major alarm since there is at least one server selected or
       // no servers are provisioned
       clear_major_alarm = true;
    }

    // fm_get_faults_by_id returns the number of alarms found
    if (max_alarms != 0)
    {
       // verify if alarms need to cleared and clear them
       clear_ntp_alarms(non_reachable_ntp_servers, max_alarms, active_alarms, clear_major_alarm);
    }

    // There are no NTP servers provisioned so there is no alarms to raise
    if (ntp_query_status == NTP_NOT_PROVISIONED)
    {
       return;
    }

    // Raise alarms if required

    // Set up alarms data
    AlarmFilter alarmFilter;
    snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), "Monitor and if condition persists, contact next level of support.");
    snprintf(alarmData.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID);
    strcpy(alarmData.uuid, "");
    snprintf(alarmData.entity_type_id, FM_MAX_BUFFER_LENGTH, "ntp");
    alarmData.alarm_state = FM_ALARM_STATE_SET;
    alarmData.alarm_type = FM_ALARM_COMM;
    alarmData.probable_cause = FM_ALARM_CAUSE_UNKNOWN;
    alarmData.timestamp = 0;
    alarmData.service_affecting = FM_FALSE;
    alarmData.suppression = FM_FALSE;

    // Here we raise the major alarm if required
    if (ntp_query_status == NTP_NONE_REACHABLE || ntp_query_status == NTP_SOME_REACHABLE_NONE_SELECTED)
    {
        wlog("NTP configuration does not contain any valid or reachable NTP servers");

        // Check if alarm is raised already
        snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp", _rmon_ctrl_ptr->my_hostname);

        bool found = false;
        for ( unsigned int i = 0; i < max_alarms; i++ )
        {
           if ( strncmp((active_alarms+i)->entity_instance_id, alarmFilter.entity_instance_id, sizeof((active_alarms+i)->entity_instance_id)) == 0 )
           {
               // Alarm already exist
               dlog("Alarm %s already raised for entity instance id:%s\n", NTP_ALARM_ID, alarmFilter.entity_instance_id);
               found = true;
               break;
           }
        }

        // Alarm does not exist so raise it
        if (!found && !created_major_alarm)
        {
            // Alarm does not exist so raise it
            alarmData.severity = FM_ALARM_SEVERITY_MAJOR;
            snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), "NTP configuration does not contain any valid or reachable NTP servers.");
            snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarmFilter.entity_instance_id);

            rc = rmon_fm_set(&alarmData, NULL);
            if (rc == FM_ERR_OK )
            {
                ilog("Alarm %s created for entity instance id:%s \n", NTP_ALARM_ID, alarmData.entity_instance_id);
                created_major_alarm = true;
             }
             else
             {
                ilog("Failed to create alarm %s for entity instance id:%s error: %d \n", NTP_ALARM_ID, alarmData.entity_instance_id, (int)rc);
             }
        }
    }

    // Here were raise alarms for individual servers
    if (ntp_query_status != NTP_OK)
    {
        wlog("Some or all of the NTP servers are not reachable");
        std::list<string>::iterator iter;
        alarmData.severity = FM_ALARM_SEVERITY_MINOR;

        // Loop through all the non reachable NTP servers
        // Check to see if an alarms is lready raised for the server.
        // If we do not find an alarm for the server then we raise it
        for ( iter = non_reachable_ntp_servers.begin (); iter != non_reachable_ntp_servers.end (); iter++ )
        {
            bool found = false;

            // Build the alarm entity instatance id
            snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp=%s", _rmon_ctrl_ptr->my_hostname, iter->c_str());

            dlog("Search alarms for entity instance id:%s \n", alarmFilter.entity_instance_id);
            for ( unsigned int i = 0; i < max_alarms; i++ )
            {
                if ( strncmp((active_alarms+i)->entity_instance_id, alarmFilter.entity_instance_id, sizeof((active_alarms+i)->entity_instance_id)) == 0 )
                {
                    dlog("Alarm %s already raised for entity instance id:%s\n", NTP_ALARM_ID, alarmFilter.entity_instance_id);
                    found = true;
                    break;
                }
            }

            // If the NTP alarm was not found then raise one for this NTP server
            if (!found)
            {

                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), "NTP address %s is not a valid or a reachable NTP server.", iter->c_str() );
                snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarmFilter.entity_instance_id);

                rc = rmon_fm_set(&alarmData, NULL);
                if (rc == FM_ERR_OK )
                {
                    ilog("Alarm %s created for entity instance id:%s \n", NTP_ALARM_ID, alarmData.entity_instance_id);
                }
                else
                {
                    ilog("Failed to create alarm %s for entity instance id:%s error:%d \n", NTP_ALARM_ID, alarmData.entity_instance_id, (int)rc);
                }
            }
        }
    }

    free(active_alarms);
    return;
}

/*****************************************************************************
 *
 * Name    : query_ntp_servers
 *
 * Purpose : execute script query_ntp_servers.sh which run the "ntpq -np"
 * which query the healths of the NTP servers. The script will return a
 * status code and also create a temporate file which will save the list
 * of reachable and non reachable NTP servers. This temp file is required
 * to generate proper alarms
 *
 *****************************************************************************/
int query_ntp_servers ( )
{
    pid_t child_pid;

    dlog ("Main Pid:%d \n", getpid() );

    ntp_child_pid = child_pid = fork ();
    if (child_pid == 0)
    {
        dlog ("Child Pid:%d \n", getpid() );

        char* argv[] = {(char*)NTPQ_QUERY_SCRIPT, NULL};
        char cmd[MAX_FILE_SIZE] ;
        memset  (cmd,0,MAX_FILE_SIZE);

        snprintf ( &cmd[0], MAX_FILE_SIZE, "%s/%s", RMON_FILES_DIR, NTPQ_QUERY_SCRIPT );

        bool close_file_descriptors = true ;
        if ( setup_child ( close_file_descriptors ) != PASS )
        {
            exit(NTP_ERROR);
        }

        /* Set child to ignore child exit */
        signal (SIGCHLD, SIG_DFL);

        /* Setup the exec arguement */
        int res = execv(cmd, argv);
        elog ( "Failed to run %s return code:%d error:%s\n", cmd, res, strerror(errno) );
        exit (NTP_ERROR);
    }

    if ( child_pid == -1 )
    {
        elog ("Fork failed (%s)\n", strerror(errno));

        /* TODO: Consider making this a critical fault
         * after 100 retries.
         * All possibilities based on man page are
         * due to resource limitations and if that does
         * not resolve in 100 retries then ip probably will never.
         **/
        return (FAIL);
    }

    return (PASS);
}

/*****************************************************************************
 *
 * Name    : rmonHdlr_ceilometer_handler
 *
 * Purpose : Handles the ceilometer sample create response message
 *
 *****************************************************************************/
void rmonHdlr_ceilometer_handler( struct evhttp_request *req, void *arg )
{
    if ( !req )
    {
        elog (" Request Timeout\n");
        ceilometerEvent.status = FAIL_TIMEOUT;
        goto _ceilometer_handler_done ;
    }

    ceilometerEvent.status = rmonHttpUtil_status(ceilometerEvent);
    if ( ceilometerEvent.status != PASS )
    {
        elog ("ceilometer HTTP request Failed (%d)\n", ceilometerEvent.status);
        rmonHttpUtil_get_response(ceilometerEvent);
        goto _ceilometer_handler_done ;
    }

_ceilometer_handler_done:
    event_base_loopbreak((struct event_base *)arg);
}

/*****************************************************************************
 *
 * Name    : generate_ceilometer_pm
 *
 * Purpose : Generate ceilometer PMs through the REST API
 *
 *****************************************************************************/
void generate_ceilometer_pm ( string r_id, string m_id, string m_type,
                              string m_unit, string m_volume,
                              string m_metadata )
{
    int rc = PASS;
    daemon_config_type * cfg_ptr = daemon_get_cfg_ptr();
    string command_path="";
    string host_ip = cfg_ptr->keystone_auth_host;
    int port = cfg_ptr->ceilometer_port;
    int count = 0;

    rmonHttpUtil_libEvent_init ( &ceilometerEvent, CEILOMETER_EVENT_SIG, host_ip, port);

    ceilometerEvent.address.append("/v2/meters/");
    ceilometerEvent.address.append(m_id);

    ceilometerEvent.user_agent = "ceilometerclient.openstack.common.apiclient";

    ceilometerEvent.payload = "[{";
    ceilometerEvent.payload.append("\"resource_id\":\"");
    ceilometerEvent.payload.append(r_id);
    ceilometerEvent.payload.append("\",\"counter_name\":\"");
    ceilometerEvent.payload.append(m_id);
    ceilometerEvent.payload.append("\",\"counter_type\":\"");
    ceilometerEvent.payload.append(m_type);
    ceilometerEvent.payload.append("\",\"counter_unit\":\"");
    ceilometerEvent.payload.append(m_unit);
    ceilometerEvent.payload.append("\",\"counter_volume\":\"");
    ceilometerEvent.payload.append(m_volume);
    ceilometerEvent.payload.append("\",\"resource_metadata\":");
    // the resource metadata is dictionary of key-value pairs
    ceilometerEvent.payload.append(m_metadata);
    ceilometerEvent.payload.append("}]");
    dlog ("Payload is : %s\n", ceilometerEvent.payload.c_str());

    rc = rmonHttpUtil_api_request (CEILOMETER_SAMPLE_CREATE, ceilometerEvent, command_path);
    do
    {
        if ( rc != PASS )
        {
            count++;
            wlog ("ceilometer failed request (%d) ... retrying (%d)\n", rc, count);
        }
        rmonHttpUtil_log_event (ceilometerEvent);

    } while ( ( rc!=PASS ) && ( count < REST_API_RETRY_COUNT ) );

    if ( rc!= PASS )
    {
        elog ("ceilometer sample create Failed (%d) (cnt:%d)\n", rc, count);
    }
}

void clear_rmon_api_counts ( registered_clients * ptr )
{
    if ( ptr->b2b_miss_count > ptr->b2b_miss_peak )
    {
        ptr->b2b_miss_peak = ptr->b2b_miss_count ;
    }

    if ( ptr->mesg_err_cnt > ptr->mesg_err_peak )
    {
        ptr->mesg_err_peak = ptr->mesg_err_cnt ;
    }
    ptr->b2b_miss_count = 0 ;
    ptr->send_err_cnt   = 0 ;
    ptr->recv_err_cnt   = 0 ;
    ptr->mesg_err_cnt   = 0 ;
}

/*****************************************************************************
 *
 * Name    : _space_to_underscore
 *
 * Purpose : Converts spaces in a string to underscores
 * *****************************************************************************/
void _space_to_underscore (string & str )
{
    char space = ' ';
    for(unsigned int i = 0; i < str.size(); i++)
    {
        if(str[i] == space)
        {
           str[i] = '_';
        }
    }
}

/*****************************************************************************
 *
 * Name    : set_alarm_defaults
 *
 * Purpose : Set the defaults for the fm alarms
 * *****************************************************************************/
void set_alarm_defaults ( resource_config_type * ptr )
{
    strcpy(alarmData.uuid, "");
    /* common data for all alarm messages */
    snprintf(alarmData.entity_type_id, FM_MAX_BUFFER_LENGTH, "system.host");

    build_entity_instance_id (ptr, alarmData.entity_instance_id);

    alarmData.alarm_state =  FM_ALARM_STATE_SET;
    alarmData.alarm_type =  FM_ALARM_OPERATIONAL;
    alarmData.probable_cause =  FM_ALARM_THRESHOLD_CROSSED;
    alarmData.timestamp = 0;
    alarmData.service_affecting = FM_FALSE;
    alarmData.suppression = FM_TRUE;
    snprintf(alarmData.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id);

}

/*****************************************************************************
 *
 * Name    : resource_handler
 *
 * Purpose : Handle the failed resources and raise alarms through
 * the FM API as well as calling a function to notify registered clients
 *****************************************************************************/
int resource_handler ( resource_config_type * ptr )
{
    int rc = RETRY ;
    AlarmFilter alarmFilter;
    string err_res_name(ptr->resource);
    _space_to_underscore(err_res_name);

    if ( ptr->stage < RMON_STAGE__STAGES )
    {
        dlog2 ("%s %s Stage %d\n", ptr->resource, rmonStages_str[ptr->stage], ptr->stage );
    }
    else
    {
        resourceStageChange ( ptr, RMON_STAGE__FINISH );
    }

    switch ( ptr->stage )
    {
        case RMON_STAGE__START:
            {
                dlog ( "%s failed:%d set_cnt:%d debounce_cnt:%d\n",
                        ptr->resource,
                        ptr->failed,
                        ptr->count,
                        ptr->debounce_cnt);
                break ;
            }
        case RMON_STAGE__MANAGE:
            {
                /* send messages to maintnance in thresholds are crossed */
                if (ptr->alarm_status == ALARM_ON)
                {
                    /* set up the fm api alarm defaults */
                    set_alarm_defaults( ptr );
                    if ( strcmp(ptr->resource, MEMORY_RESOURCE_NAME) == 0 )
                    {
                        snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action),
                                 "Monitor and if condition persists, contact next level of support; may require additional memory on Host.");
                    }
                    else if ( strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) == 0 )
                    {
                        snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action),
                                 "Check Management and Infrastructure Networks and Controller or Storage Nodes.");
                    }
                    else
                    {
                      if ((ptr->type != NULL) && (strcmp(ptr->type, "lvg") == 0 ))
                        {
                          snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action),
                                   "Monitor and if condition persists, consider adding additional physical volumes to the volume group.");
                        }
                      else
                        {
                        snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action),
                                "Monitor and if condition persists, contact next level of support.");
                        }
                    }

                    if ( ptr->sev == SEVERITY_MINOR )
                    {
                        alarmData.severity = FM_ALARM_SEVERITY_MINOR;

                        if ( ptr->percent == PERCENT_USED ) {

                            if ( ptr->alarm_type == STANDARD_ALARM )
                            {
                                ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n",
                                       ptr->resource, ptr->minor_threshold, ptr->resource_value);
                                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                         "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.",
                                         ptr->resource, ptr->minor_threshold, ptr->resource_value);
                            }
                            else {
                                ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n",
                                       ptr->minor_threshold, ptr->resource_value);
                                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                         "Filesystem exceeded; threshold: %u%%, actual: %.2f%%.",
                                          ptr->minor_threshold, ptr->resource_value);
                            }
                        } else {
                           if ( ptr->alarm_type == STANDARD_ALARM )
                           {
                                ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n",
                                        ptr->resource, ptr->minor_threshold_abs_node0, ptr->resource_value);
                                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                         "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.",
                                         ptr->resource, ptr->minor_threshold_abs_node0, ptr->resource_value);
                            } else {
                                ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n",
                                      ptr->minor_threshold_abs_node0, ptr->resource_value);
                                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                         "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.",
                                          ptr->minor_threshold_abs_node0, ptr->resource_value);
                            }
                        }
                        snprintf(ptr->errorMsg, sizeof(ptr->errorMsg),
                                 "%s minor_threshold_set", err_res_name.c_str());
                    }
                    else if ( ptr->sev == SEVERITY_MAJOR )
                    {
                        alarmData.severity = FM_ALARM_SEVERITY_MAJOR;

                        if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) != 0)
                        {
                            if (ptr->percent == PERCENT_USED){
                                if ( ptr->alarm_type == STANDARD_ALARM )
                                {
                                    ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n",
                                           ptr->resource, ptr->major_threshold, ptr->resource_value);
                                    snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                             "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.",
                                             ptr->resource, ptr->major_threshold, ptr->resource_value);
                                }
                                else {
                                    ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n",
                                           ptr->major_threshold, ptr->resource_value);
                                    snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                             "Filesystem threshold exceeded; threshold: %u%%, actual: %.2f%%.",
                                              ptr->major_threshold, ptr->resource_value);
                                }
                            } else {
                                if ( ptr->alarm_type == STANDARD_ALARM )
                                {
                                    ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n",
                                           ptr->resource, ptr->major_threshold_abs_node0, ptr->resource_value);
                                    snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                             "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.",
                                    ptr->resource, ptr->major_threshold_abs_node0, ptr->resource_value);
                                } else {
                                    ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n",
                                           ptr->major_threshold_abs_node0, ptr->resource_value);
                                    snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                             "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.",
                                              ptr->major_threshold_abs_node0, ptr->resource_value);
                                }
                            }
                        }
                        else if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) == 0)
                        {
                            /* instance alarming is a special case of alarm */
                            wlog ("No access to remote VM volumes.\n");
                            snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                     "No access to remote VM volumes.");
                        }

                        if ( ptr->res_type == RESOURCE_TYPE__FILESYSTEM_USAGE )
                        {
                            snprintf(ptr->errorMsg, sizeof(ptr->errorMsg),
                                    "%s %s",err_res_name.c_str(), DEGRADE_CLEAR_MSG );
                        }
                        else
                        {
                            snprintf(ptr->errorMsg, sizeof(ptr->errorMsg),
                                     "%s major_threshold_set",err_res_name.c_str());
                        }
                    }
                    else if ( ptr->sev == SEVERITY_CRITICAL )
                    {
                        alarmData.severity = FM_ALARM_SEVERITY_CRITICAL;

                        if (ptr->percent == PERCENT_USED){
                            if ( ptr->alarm_type == STANDARD_ALARM )
                            {
                                ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n",
                                      ptr->resource, ptr->critical_threshold, ptr->resource_value);
                                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                         "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.",
                                         ptr->resource, ptr->critical_threshold, ptr->resource_value);
                            }
                            else {
                                ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n",
                                      ptr->critical_threshold, ptr->resource_value);
                                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                         "Filesystem threshold exceeded; threshold: %u%%, actual: %.2f%%.",
                                         ptr->critical_threshold, ptr->resource_value);
                                }
                        } else {
                            if ( ptr->alarm_type == STANDARD_ALARM )
                            {
                                ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n",
                                      ptr->resource, ptr->critical_threshold_abs_node0, ptr->resource_value);
                                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                         "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.",
                                         ptr->resource, ptr->critical_threshold_abs_node0, ptr->resource_value);
                            } else {
                                ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n",
                                      ptr->critical_threshold_abs_node0, ptr->resource_value);
                                snprintf(alarmData.reason_text, sizeof(alarmData.reason_text),
                                        "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.",
                                        ptr->critical_threshold_abs_node0, ptr->resource_value);
                            }
                        }
                        snprintf(ptr->errorMsg, sizeof(ptr->errorMsg),
                                 "%s major_threshold_set",err_res_name.c_str());
                    }

                    rc = rmon_fm_set(&alarmData, NULL);
                    if (rc == FM_ERR_OK ) {
                        ilog("%s: %s alarm\n",
                                  ptr->resource,
                                  FmAlarmSeverity_to_string(alarmData.severity).c_str());
                        ptr->alarm_raised = true;
                     } else {
                        ilog("%s: %s alarm failed (rc:%d)\n",
                                  ptr->resource,
                                  FmAlarmSeverity_to_string(alarmData.severity).c_str(),
                                  (int)rc);
                     }

                    if (ptr->alarm_raised)
                    {
                        if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND))
                        {
                            /* If degrade debounce is non-zero then this
                             * alarm condition is candidate for host degrade */
                            if (ptr->debounce)
                            {
                                if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS )
                                {
                                    ptr->failed_send++;
                                    wlog ("%s request send failed (count:%d)\n",
                                              ptr->resource,
                                              ptr->failed_send );
                                }
                                else
                                {
                                    ptr->failed_send = 0;
                                }
                            }
                        }
                        else
                        {
                            ptr->failed_send = 0;
                        }
                        resourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT );
                    }
                }
                else {
                    resourceStageChange ( ptr, RMON_STAGE__FINISH );
                }

                break;
            }

        case RMON_STAGE__IGNORE:
            {

                //nothing to do here, go to the finished stage
                resourceStageChange ( ptr, RMON_STAGE__FINISH );

                break ;
            }

        case RMON_STAGE__MONITOR_WAIT:
            {
                if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND) && (ptr->failed_send > 0))
                {
                    if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS )
                    {
                        wlog ("%s  request send failed \n", ptr->resource);
                        ptr->failed_send++;
                    }
                    else
                    {
                        ptr->failed_send = 0;
                    }
                }
                break;
            }

        case RMON_STAGE__FINISH:
            {
                if ((ptr->alarm_status == ALARM_ON) && (ptr->alarm_raised))
                {
                    snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id);

                    build_entity_instance_id (ptr, alarmData.entity_instance_id);

                    snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id);
                    ilog ("%s alarm clear\n", ptr->resource );

                    /* clear the alarm */
                    EFmErrorT ret = rmon_fm_clear(&alarmFilter);
                    if (( ret == FM_ERR_OK ) || ( ret == FM_ERR_ENTITY_NOT_FOUND ))
                    {
                        if (ret == FM_ERR_ENTITY_NOT_FOUND)
                        {
                            dlog ("%s alarm clear failed, entity '%s' not found",
                                      ptr->resource, alarmData.entity_instance_id);
                        }

                        snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), "%s cleared_alarms_for_resource", err_res_name.c_str());
                        if ( (_rmon_ctrl_ptr->clients > 0) && ( ptr->failed_send < MAX_FAIL_SEND ) && (ret == FM_ERR_OK) )
                        {
                            while (( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) &&
                                  ( ptr->failed_send < MAX_FAIL_SEND ))
                            {
                                wlog ("%s request send failed \n", ptr->resource);
                                ptr->failed_send++;
                            }

                            ptr->alarm_raised = false;
                            ptr->failed_send = 0;
                            ptr->failed = false ;
                            ptr->count = 0 ;
                            ptr->sev = SEVERITY_CLEARED ;
                            ptr->stage = RMON_STAGE__START ;
                        }
                        else
                        {
                            ptr->alarm_raised = false;
                            ptr->failed_send = 0;
                            ptr->failed = false ;
                            ptr->count = 0 ;
                            ptr->sev = SEVERITY_CLEARED ;
                            ptr->stage = RMON_STAGE__START ;
                        }
                    }
                    else
                    {
                        wlog("%s alarm clear failed, entity '%s' (rc:%d)\n",
                                 ptr->resource,
                                 alarmData.entity_instance_id,
                                 ret);
                    }
                }
                else
                {
                    ptr->alarm_raised = false;
                    ptr->failed_send = 0;
                    ptr->failed = false ;
                    ptr->count = 0 ;
                    ptr->sev = SEVERITY_CLEARED ;
                    ptr->stage = RMON_STAGE__START ;
                }
                rc = PASS ;
                break ;
            }
        default:
            {
                slog ("%s Invalid stage (%d)\n", ptr->resource, ptr->stage );

                /* Default to finish for invalid case.
                 * If there is an issue then it will be detected */
                resourceStageChange ( ptr, RMON_STAGE__FINISH );
            }
    }
    return rc;
}

/*****************************************************************************
 *
 * Name    : process_failures
 *
 * Purpose : Check whether a percentage resource is to be failed or a failure
 * threshold is to be cleared by the resource_handler
 *
 *****************************************************************************/
void process_failures ( resource_config_type * ptr )
{
    if (ptr->stage == RMON_STAGE__INIT)
    {
        /* first time after restart/reboot, clear the alarm if the first reading is good */
        resourceStageChange ( ptr, RMON_STAGE__START );
        if (ptr->resource_value < ptr->minor_threshold)
        {
            // assuming we left as alarm on last time
            ptr->alarm_status = ALARM_ON;
            ptr->alarm_raised = true;
            ptr->failed = true;
            ilog("%s Setting the state to FINISH\n", ptr->resource);
            resourceStageChange ( ptr, RMON_STAGE__FINISH );
        }
        // Now we start counting as normal ...
    }
    else
    {
        if (ptr->failed)
        {
            /* If the resource is already failed, check to see if it is to be cleared */
            if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value < ptr->minor_threshold )) ||
                    (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value < ptr->major_threshold )) ||
                    (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value < ptr->critical_threshold )))
            {
                if (ptr->count > ptr->num_tries)
                    ptr->count = ptr->num_tries;

                if (ptr->count > 0)
                    ptr->count--;

                if (ptr->count == 0) {
                    ptr->sev = SEVERITY_CLEARED;
                    ilog("%s Setting the state to FINISH\n", ptr->resource);
                    resourceStageChange ( ptr, RMON_STAGE__FINISH );
                }
            }
            else
            {
                /* While in failed state, the resource usage must sustain normal level
                 * num_tries number of times before an alarm can be cleared. Keep incrementing the counter
	             * as it will be set to num_tries in the above block as soon as resource usage returns to
                 * normal level.*/
                ptr->count++;

                // rmon needs to send degrade assert message periodically as the
                // condition might be cleared by maintenance over controller swact.
                //
                // added meaning to the debounce config setting.
                //            must be non-zero to degrade the host.
                if ((ptr->alarm_raised) && (ptr->debounce) &&
                    (_rmon_ctrl_ptr->clients > 0))
                {
                    if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS )
                    {
                        ptr->failed_send++ ;
                        wlog ("%s request send failed (count:%d)\n",
                                  ptr->resource,
                                  ptr->failed_send);
                    }
                    else
                    {
                        mlog ("%s rmon_send_request ok\n", ptr->resource );
                        ptr->failed_send = 0 ;
                    }
                }
                else
                {
                     /* typical path for resources that
                      *  - do not degrade host
                      *  - do not raise alarms */
                     dlog ("%s: alarm:%d debounce:%d clients:%d\n",
                                ptr->resource,
                               (ptr->alarm_raised),
                               (ptr->debounce),
                               (_rmon_ctrl_ptr->clients));
                }
            }
        }
    }

    /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures */
    if (( ptr->resource_value >= ptr->minor_threshold ) &&
            ( ptr->resource_value < ptr->major_threshold )
            && (ptr->sev != SEVERITY_MINOR))
    {
        ptr->count++;
        if ( ptr->count >= ptr->num_tries) {
            ptr->failed = true;
            ptr->sev = SEVERITY_MINOR;
            resourceStageChange ( ptr, RMON_STAGE__MANAGE);
        }
    }

    else if (( ptr->resource_value >= ptr->major_threshold ) &&
            ( ptr->resource_value < ptr->critical_threshold )
            && (ptr->sev != SEVERITY_MAJOR))
    {
        ptr->count++;
        if ( ptr->count >= ptr->num_tries){
            ptr->failed = true;
            ptr->sev = SEVERITY_MAJOR;
            resourceStageChange ( ptr, RMON_STAGE__MANAGE);
        }
    }
    else if (( ptr->resource_value >= ptr->critical_threshold )&&
            (ptr->sev != SEVERITY_CRITICAL))
    {
        ptr->count++;
        if (ptr->count >= ptr->num_tries){
            ptr->failed = true;
            ptr->sev = SEVERITY_CRITICAL;
            resourceStageChange ( ptr, RMON_STAGE__MANAGE);
        }
    }
    else
    {
	/* if the host experienced a resource blip in the previous audit run and usage
	 * is now back at the normal level, decrement the count.*/
	if ((!ptr->failed) && (ptr->count > 0)){
            ptr->count--;
            dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count);
        }
    }
}

/*****************************************************************************
 *
 * Name    : process_failures_absolute
 *
 * Purpose : Check whether an absolute resource is to be failed or a
 * failure threshold is to be cleared by the resource_handler
 *
 *****************************************************************************/
void process_failures_absolute ( resource_config_type * ptr )
{
    int node = 0;

    if (strcmp(ptr->resource,"processor_node1") == 0)
    {
        /* per node memory checking is enabled */
        node = 1;
    }

    if (ptr->failed) {
        /* If the resource is already failed, check to see if it is to be cleared */
        if (node == 0) {

            if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value > ptr->minor_threshold_abs_node0 )) ||
                    (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value > ptr->major_threshold_abs_node0 )) ||
                    (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value > ptr->critical_threshold_abs_node0 )))
            {
                if (ptr->count > ptr->num_tries)
                    ptr->count = ptr->num_tries;
                if (ptr->count > 0)
                    ptr->count--;

                if (ptr->count == 0) {
                    ptr->sev = SEVERITY_CLEARED;
                    resourceStageChange ( ptr, RMON_STAGE__FINISH );
                }
            }
            else
	    {
		/* While in failed state, the resource usage must sustain normal level
		 * num_tries number of times before an alarm can be cleared. Keep incrementing the counter
		 * as it will be set to num_tries in the above block as soon as resource usage returns to
		 * normal level.*/
		ptr->count++;
            }
        }
        else {

            if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value > ptr->minor_threshold_abs_node1 )) ||
                    (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value > ptr->major_threshold_abs_node1 )) ||
                    (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value > ptr->critical_threshold_abs_node1 )))
            {
                if (ptr->count > ptr->num_tries)
                    ptr->count = ptr->num_tries;
                if (ptr->count > 0)
                    ptr->count--;

                if (ptr->count == 0) {
                    ptr->sev = SEVERITY_CLEARED;
                    resourceStageChange ( ptr, RMON_STAGE__FINISH );
                }
            }
 	    else
	    {
		/* While in failed state, the resource usage must sustain normal level
		 * num_tries number of times before an alarm can be cleared. Keep incrementing the counter
		 * as it will be set to num_tries in the above block as soon as resource usage returns to
		 * normal level.*/
		ptr->count++;
            }
        }
    }

    if (node == 0) {
        /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures node 0  */
        if (( ptr->resource_value <= ptr->minor_threshold_abs_node0 ) &&
                ( ptr->resource_value > ptr->major_threshold_abs_node0 ) &&
                (ptr->sev != SEVERITY_MINOR))
        {
            ptr->count++;
            if ( ptr->count >= ptr->num_tries){
                ptr->failed = true;
                ptr->sev = SEVERITY_MINOR;
                resourceStageChange ( ptr, RMON_STAGE__MANAGE);
            }
        }

        else if (( ptr->resource_value <= ptr->major_threshold_abs_node0 ) &&
                ( ptr->resource_value > ptr->critical_threshold_abs_node0 ) &&
                (ptr->sev != SEVERITY_MAJOR))
        {
            ptr->count++;
            if ( ptr->count >= ptr->num_tries){
                ptr->failed = true;
                ptr->sev = SEVERITY_MAJOR;
                resourceStageChange ( ptr, RMON_STAGE__MANAGE);
            }
        }
        else if (( ptr->resource_value < ptr->critical_threshold_abs_node0 )&&
                (ptr->sev != SEVERITY_CRITICAL))
        {
            ptr->count++;
            if (ptr->count >= ptr->num_tries){
                ptr->failed = true;
                ptr->sev = SEVERITY_CRITICAL;
                resourceStageChange ( ptr, RMON_STAGE__MANAGE);
            }
        }
	else
	{
	    /* if the host experienced a resource blip in the previous audit run and usage
	     * is now back at the normal level, decrement the count.*/
	    if ((!ptr->failed) && (ptr->count > 0)){
                ptr->count--;
                dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count);
            }
        }
    } else {

        /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures node 1 */
        if (( ptr->resource_value <= ptr->minor_threshold_abs_node1 ) &&
                ( ptr->resource_value > ptr->major_threshold_abs_node1 ) &&
                (ptr->sev != SEVERITY_MINOR))
        {
            ptr->count++;
            if ( ptr->count >= ptr->num_tries){
                ptr->failed = true;
                ptr->sev = SEVERITY_MINOR;
                resourceStageChange ( ptr, RMON_STAGE__MANAGE);
            }
        }
        else if (( ptr->resource_value <= ptr->major_threshold_abs_node1 ) &&
                ( ptr->resource_value > ptr->critical_threshold_abs_node1 ) &&
                (ptr->sev != SEVERITY_MAJOR))
        {
            ptr->count++;
            if ( ptr->count >= ptr->num_tries){
                ptr->failed = true;
                ptr->sev = SEVERITY_MAJOR;
                resourceStageChange ( ptr, RMON_STAGE__MANAGE);
            }
        }
        else if (( ptr->resource_value < ptr->critical_threshold_abs_node1 )&&
                (ptr->sev != SEVERITY_CRITICAL))
        {
            ptr->count++;
            if (ptr->count >= ptr->num_tries){
                ptr->failed = true;
                ptr->sev = SEVERITY_CRITICAL;
                resourceStageChange ( ptr, RMON_STAGE__MANAGE);
            }
        }
	else
	{
	    /* if the host experienced a resource blip in the previous audit run and usage
	     * is now back at the normal level, decrement the count.*/
	    if ((!ptr->failed) && (ptr->count > 0)){
                ptr->count--;
                dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count);
            }
        }
    }
}

void update_total_clients (int total_clients)
{
    _rmon_ctrl_ptr->clients = total_clients;
}

void add_registered_client (registered_clients client)
{

    registered_clt[_rmon_ctrl_ptr->clients] = client;
    ilog("added registered client: %s \n", client.client_name);
}

/*****************************************************************************
 *
 * Name    : add_fs_resource
 *
 * Purpose : Add a dynamic or static fs resource by reading
 * the: /etc/rmonfiles.d/dynamic.conf file
 *****************************************************************************/
void add_fs_resource ( int resource_index, int criticality_index, int enabled,
                       int percent, int abs_values[3], int alarm_type,
                       int types_index, int devices_index, int mounted )
{
    int fs_resource_index;
    get_resource_index( FS_RESOURCE_NAME, &fs_resource_index );

    int i = _rmon_ctrl_ptr->resources;

    if (i >= MAX_RESOURCES) {
        wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES );
    }
    else {

        resource_config[i].resource = dynamic_resource.at(resource_index).c_str();
        resource_config[i].severity = criticality_resource.at(criticality_index).c_str();
        resource_config[i].type = types.at(types_index).c_str();
        resource_config[i].device = devices.at(devices_index).c_str();
        resource_config[i].critical_threshold = UNUSED_CRITICAL; // initialization
        resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0;

        resource_config[i].num_tries = DEFAULT_NUM_TRIES;
        resource_config[i].alarm_status = enabled;
        resource_config[i].percent = percent;
        resource_config[i].mounted = mounted;
        resource_config[i].alarm_type = alarm_type;
        resource_config[i].debounce = resource_config[fs_resource_index].debounce;

        // percentage based threshold measure
        switch (percent) {
            case PERCENT_USED:
                if (abs_values[0] == 0) {
                    // if this is a static mounted file system resource
                    // then use common threshold values provided for the
                    // File System Resource
                    if ( (alarm_type == STATIC_ALARM) && (mounted == MOUNTED) ) {
                        resource_config[i].minor_threshold =
                            resource_config[fs_resource_index].minor_threshold;

                        resource_config[i].major_threshold =
                            resource_config[fs_resource_index].major_threshold;

                        if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                            resource_config[i].critical_threshold =
                                resource_config[fs_resource_index].critical_threshold;
                        }
                        resource_config[i].num_tries =
                            resource_config[fs_resource_index].num_tries;
                    }
                    else {
                        /* There are no specific percent thresholds for
                           the dynamic resource, use defaults */
                        resource_config[i].minor_threshold = FS_MINOR;
                        resource_config[i].major_threshold = FS_MAJOR;
                        if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                            resource_config[i].critical_threshold = FS_CRITICAL;
                        }
                    }
                }
                else if (abs_values[0] != 0) {
                    /* Specific percent thresholds are defined for the dynamic resource */
                    resource_config[i].minor_threshold = abs_values[0];
                    resource_config[i].major_threshold = abs_values[1];
                    if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                        resource_config[i].critical_threshold = abs_values[2];
                    }
                }
                break;

            case PERCENT_UNUSED:
                if (abs_values[0] == 0) {
                    // if this is a static mounted file system then use common
                    // threshold values provided for the File System Resource
                    if ( (alarm_type == STATIC_ALARM) && (mounted == MOUNTED) ) {
                        resource_config[i].minor_threshold_abs_node0 =
                            resource_config[fs_resource_index].minor_threshold_abs_node0;

                        resource_config[i].major_threshold_abs_node0 =
                            resource_config[fs_resource_index].major_threshold_abs_node0;

                        if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                            resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0;
                        }
                        resource_config[i].num_tries =
                            resource_config[fs_resource_index].num_tries;
                    }
                    else {
                        /* If the percent thresholds are selected
                         * use the default thresholds for the absolute
                         * value thresholds for the dynamic resource */
                        resource_config[i].minor_threshold_abs_node0 = DEFAULT_MINOR_ABS_NODE0;
                        resource_config[i].major_threshold_abs_node0 = DEFAULT_MAJOR_ABS_NODE0;
                        if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                            resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0;
                        }
                    }
                }
                else if (abs_values[0] != 0) {
                    /* Specific absolute value thresholds are specified for the dynamic resource */
                    resource_config[i].minor_threshold_abs_node0 = abs_values[0];
                    resource_config[i].major_threshold_abs_node0 = abs_values[1];
                    if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                        resource_config[i].critical_threshold_abs_node0 = abs_values[2];
                    }
                }
                break;
        }

        ilog ("Monitoring %2d: %-20s (%s) (%s)\n", i, resource_config[i].resource ,
                resource_config[i].severity, (enabled ? "enabled" : "disabled") );

        /* Init the timer for this resource */
        mtcTimer_init ( rtimer[i] ) ;

        rtimer[i].hostname = "localhost" ;
        rtimer[i].service  = resource_config[i].resource ;
        resource_config[i].i                 = i;
        resource_config[i].failed            = false ;
        resource_config[i].count             = 0 ;
        resource_config[i].stage             = RMON_STAGE__START ;
        resource_config[i].sev               = SEVERITY_CLEARED ;
        resource_config[i].failed_send       = 0;
        resource_config[i].alarm_raised      = false;
        resource_config[i].res_type          = RESOURCE_TYPE__FILESYSTEM_USAGE ;

        /* add the alarm id for the FM API per resource monitored */
        snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID);

        mem_log_resource ( &resource_config[i] );

        i++;
        _rmon_ctrl_ptr->resources = i;
    }
}

/*****************************************************************************
 *
 * Name    : save_dynamic_resource
 *
 * Purpose : Loops through resources and only adds a dynamic file system
 * resource if it does not yet exist
 ******************************************************************************/
void save_fs_resource ( string resource_name, string criticality,
                        int enabled, int percent,
                        int abs_values[3], int alarm_type,
                        string type, string device, int mounted)
{

    size_t resource_index;
    size_t criticality_index;
    size_t types_index;
    size_t devices_index;

    bool newResource = true;

    for (int k=0; k< _rmon_ctrl_ptr->resources; k++) {

        if (strcmp(resource_config[k].resource, resource_name.c_str()) == 0) {
            newResource = false;
            break;
        }
    }

    if (newResource == true) {
        dlog ("%s(%s) fs resource add in %s state\n", resource_name.c_str(),
              criticality.c_str(), (enabled) ? "enabled" : "disabled");
        dynamic_resource.push_back(resource_name);
        resource_index =  dynamic_resource.size() - 1;
        /* add the criticality value to a vector for permenant storage */
        criticality_resource.push_back(criticality);
        criticality_index =  criticality_resource.size() - 1;
        types.push_back(type);
        types_index = types.size() - 1;
        devices.push_back(device);
        devices_index = devices.size() - 1;
        add_fs_resource ( resource_index, criticality_index, enabled, percent, abs_values, alarm_type, types_index, devices_index, mounted );
    }
}

/*****************************************************************************
 *
 * Name    : add_dynamic_mem_resource
 *
 * Purpose : Add a dynamic memory resource at runtime based on the name and criticality.
 * The resource has both custom or default percent and absolute thresholds.
 * *****************************************************************************/
int add_dynamic_mem_resource ( int resource_index, int criticality_index,
                               double r_value, int percent, int abs_values[3],
                               const char * alarm_id, int socket_id=0 )
{

    int i = _rmon_ctrl_ptr->resources;
    int new_index = i;
    if (i >= MAX_RESOURCES) {
        wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES );
    }
    else {

        resource_config[i].resource = dynamic_resource.at(resource_index).c_str();
        resource_config[i].severity = criticality_resource.at(criticality_index).c_str();

        if ((percent == 1) && (abs_values[0] == 0)) {
            /* There are no specific percent thresholds for the dynamic resource, use defaults */
            resource_config[i].minor_threshold = DEFAULT_MINOR;
            resource_config[i].major_threshold = DEFAULT_MAJOR;
            if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                resource_config[i].critical_threshold = DEFAULT_CRITICAL;
            } else {
                resource_config[i].critical_threshold = UNUSED_CRITICAL;
            }
        }
        else if ((percent == 1) && (abs_values[0] != 0)) {
            /* Specific percent thresholds are defined for the dynamic resource */
            resource_config[i].minor_threshold = abs_values[0];
            resource_config[i].major_threshold = abs_values[1];
            if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                resource_config[i].critical_threshold = abs_values[2];
            } else {
                resource_config[i].critical_threshold = UNUSED_CRITICAL;
            }
        }

        if ((percent == 0) && (abs_values[0] == 0)) {
            /* If the percent thresholds are selected use the default thresholds for the absolute
             * value thresholds for the dynamic resource */
            resource_config[i].minor_threshold_abs_node0 = DEFAULT_MINOR_ABS_NODE0;
            resource_config[i].major_threshold_abs_node0 = DEFAULT_MAJOR_ABS_NODE0;
            if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0;
            } else {
                resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0;
            }
            resource_config[i].minor_threshold_abs_node1 = DEFAULT_MINOR_ABS_NODE1;
            resource_config[i].major_threshold_abs_node1 = DEFAULT_MAJOR_ABS_NODE1;
            resource_config[i].critical_threshold_abs_node1 = DEFAULT_CRITICAL_ABS_NODE1;
        }
        else if ((percent == 0) && (abs_values[0] != 0)) {
            /* Specific absolute value thresholds are specified for the dynamic resource */
            resource_config[i].minor_threshold_abs_node0 = abs_values[0];
            resource_config[i].major_threshold_abs_node0 = abs_values[1];
            if (_rmon_ctrl_ptr->rmon_critical_thr == 1) {
                resource_config[i].critical_threshold_abs_node0 = abs_values[2];
            } else {
                resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0;
            }
            resource_config[i].minor_threshold_abs_node1 = DEFAULT_MINOR_ABS_NODE1;
            resource_config[i].major_threshold_abs_node1 = DEFAULT_MAJOR_ABS_NODE1;
            resource_config[i].critical_threshold_abs_node1 = DEFAULT_CRITICAL_ABS_NODE1;
        }

        resource_config[i].num_tries = DEFAULT_NUM_TRIES;
        resource_config[i].alarm_status = DEFAULT_ALARM_STATUS;
        resource_config[i].percent = percent;

        ilog ("Monitoring %2d: Dynamic Resource- %s (%s)\n", i, resource_config[i].resource ,
                resource_config[i].severity );

        /* Init the timer for this resource */
        mtcTimer_init ( rtimer[i] ) ;

        rtimer[i].hostname = "localhost" ;
        rtimer[i].service  = resource_config[i].resource ;
        resource_config[i].i                 = i;
        resource_config[i].failed            = false ;
        resource_config[i].count             = 0 ;
        resource_config[i].resource_value    = r_value ;
        resource_config[i].resource_prev     = r_value ;
        resource_config[i].stage             = RMON_STAGE__START ;
        resource_config[i].sev               = SEVERITY_CLEARED ;
        resource_config[i].alarm_type        = STANDARD_ALARM;
        resource_config[i].failed_send       = 0;
        resource_config[i].alarm_raised      = false;
        resource_config[i].socket_id         = socket_id;

        /* add the alarm id for the FM API per resource monitored */
        snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, alarm_id);

        mem_log_resource ( &resource_config[i] );
        i++;
        _rmon_ctrl_ptr->resources = i;
    }
    return new_index;
}

/*****************************************************************************
 *
 * Name    : save_dynamic_mem_resource
 *
 * Purpose : Loops through resources and only adds a memory resource if it does not yet
 * exist
 ******************************************************************************/
int save_dynamic_mem_resource ( string resource_name, string criticality,
                                double r_value, int percent, int abs_values[3],
                                const char * alarm_id, int socket_id=0 )
{

    size_t resource_index;
    size_t criticality_index;
    bool newResource = true;
    int updated_index;

    for (int k=0; k< _rmon_ctrl_ptr->resources; k++) {

        if (strcmp(resource_config[k].resource, resource_name.c_str()) == 0) {
            resource_config[k].resource_value=
            resource_config[k].resource_prev = r_value;
            updated_index = k;
            newResource = false;
            break;
        }
    }

    if (newResource == true) {
        dynamic_resource.push_back(resource_name);
        resource_index =  dynamic_resource.size() - 1;
        /* add the criticality value to a vector for permenant storage */
        criticality_resource.push_back(criticality);
        criticality_index =  criticality_resource.size() - 1;
        updated_index = add_dynamic_mem_resource(resource_index, criticality_index,
                                                 r_value, percent, abs_values,
                                                 alarm_id, socket_id);
        rmon_alarming_init( &resource_config[updated_index] );
        resource_config[updated_index].resource_prev =
        resource_config[updated_index].resource_value= r_value;
    }
    return updated_index;
}

/*****************************************************************************
 *
 * Name    : calculate_fs_usage
 *
 * Purpose : Calculate the file system usage as a percentage or an absolute value
 * for the number of MiB remaining overall and in a specific fs. The calculation
 * is done by executing the df command and getting the response for each type
 * of filesystem being monitored.
 *****************************************************************************/
void calculate_fs_usage ( resource_config_type * ptr )
{
    dlog("%s, is mounted resource: %d is enabled: %d\n", ptr->resource, ptr->mounted, ptr->alarm_status);

    FILE *pFile;
    int last_index;
    char fsLine[128];
    char buf[200];
    double fsUsage = 0;
    char mounted_on[50], file_system[50], capacity[10];
    unsigned long long size, used, available;
    string res_val;
    double cap_percent;
    double MiB = 1024.0;
    double free_units = 0;
    double usage_percents = 0;
    double total_units = 0;

    if (ptr->mounted == MOUNTED)
    {
        if (strcmp(ptr->resource, FS_RESOURCE_NAME) == 0)
        {
            // We do not calculate the total for filesystem
            // Resource FS_RESOURCE_NAME represents the total filesystem
            return;
        }
        else
        {
            snprintf(buf, sizeof(buf), "timeout 2 df -T -P --local %s 2>/dev/null", ptr->resource);
        }

        /* convert output of "df -P" from KiB to MiB */
        if(!(pFile = popen(buf, "r")))
        {
            elog("Error, command df is not executed on resource: %s\n", ptr->resource);
        }
        else
        {
            while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL))
            {
                sscanf(fsLine, "%49s %*s %llu %llu %llu %9s %49s", file_system, &size, &used, &available, capacity, mounted_on);
                if (strcmp(mounted_on, ptr->resource) == 0)
                {
                    string temp_val(capacity);
                    // exclude percentage (%) sign
                    last_index = temp_val.find_first_not_of("0123456789");
                    res_val = temp_val.substr(0, last_index);
                    snprintf(capacity, sizeof(capacity), res_val.c_str());
                    sscanf(capacity, "%lf", &cap_percent);

                    if (ptr->percent == PERCENT_USED)
                    {
                        fsUsage = cap_percent;
                        ptr->resource_value = fsUsage;
                        if ( log_value ( ptr->resource_value,
                                         ptr->resource_prev,
                                         DEFAULT_LOG_VALUE_STEP ) )
                        {
                            plog("filesystem: %s usage: %.2f%%\n",
                                  ptr->resource, ptr->resource_value);
                        }
                    }
                    else
                    {
                        fsUsage = (double) (((100 - cap_percent) / 100) * size);
                        fsUsage = fsUsage / MiB;
                        ptr->resource_value = fsUsage;
                        if ( log_value ( ptr->resource_value,
                                         ptr->resource_prev,
                                         DEFAULT_LOG_VALUE_STEP ) )
                        {
                            plog("filesystem: %s has %f (MiB) (free)\n",
                                  ptr->resource, ptr->resource_value);
                        }
                    }

                    // The size of the file system is 2X the user specified size to allow upgrades.
                    // Currently we are alarming on the used size but instead the alarming should be based on used size /2.
                    // As a result there is no indication to the user that they have may have eaten into the reserved space
                    // for upgrades resulting in an aborted upgrade.
                    if (strcmp(mounted_on, POSTGRESQL_FS_PATH) == 0)
                    {
                        ptr->resource_value = ptr->resource_value / 2;
                    }
                }
            }
        }
        pclose(pFile);
    }
    else if(strcmp(ptr->resource, NOVA_LOCAL) == 0)
    {
        /*rmon queries the thin pool usage if the volume group is nova-local*/
        snprintf(buf, sizeof(buf), "timeout 2 lvdisplay -C --noheadings --nosuffix -o data_percent --units m "
                                   "/dev/nova-local/nova-local-pool 2>/dev/null");

        if(!(pFile = popen(buf, "r")))
        {
            elog("Error, command lvdisplay free units is not executed \n");
        }
        else
        {
            while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL))
            {
                usage_percents = atof(fsLine);
            }
            pclose(pFile);
        }
        ptr->resource_value = usage_percents;
        if ( log_value ( ptr->resource_value,
                         ptr->resource_prev,
                         DEFAULT_LOG_VALUE_STEP ))
        {
            plog("filesystem: %s, usage: %f%% \n", ptr->resource, ptr->resource_value);
        }
    }
    else if(strcmp(ptr->resource, CINDER_VOLUMES) == 0)
    {
        /*rmon queries the thin pool usage if the volume group is cinder-volumes*/
        snprintf(buf, sizeof(buf), "timeout 2 lvdisplay -C --noheadings --nosuffix -o data_percent --units m "
                                   "/dev/cinder-volumes/cinder-volumes-pool 2>/dev/null");

        if(!(pFile = popen(buf, "r")))
        {
            elog("Error, command lvdisplay free units is not executed \n");
        }
        else
        {
            while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL))
            {
                usage_percents = atof(fsLine);
            }
            pclose(pFile);
        }
        ptr->resource_value = usage_percents;
        if ( log_value ( ptr->resource_value,
                         ptr->resource_prev,
                         DEFAULT_LOG_VALUE_STEP ))
        {
            plog("filesystem: %s, usage: %.2f%% \n", ptr->resource, ptr->resource_value);
        }
    }
    else
    {
        /* for the unmounted dynamic file system resources, use the vgdisplay command to get vg free units */
        snprintf(buf, sizeof(buf), "timeout 2 vgdisplay -C --noheadings --nosuffix -o vg_free --units m %s 2>/dev/null", ptr->resource);

        if(!(pFile = popen(buf, "r")))
        {
            elog("Error, command vgdisplay free units is not executed \n");
        }
        else
        {
            while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL))
            {
                free_units = atof(fsLine);
            }
            pclose(pFile);
        }

        /* for the unmounted dynamic file system resources, use the vgdisplay command to get vg size */
        snprintf(buf, sizeof(buf), "timeout 2 vgdisplay -C --noheadings --nosuffix -o vg_size --units m %s 2>/dev/null", ptr->resource );

        if(!(pFile = popen(buf, "r")))
        {
            elog("Error, command vgdisplay total units is not executed \n");
        }
        else
        {
            while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL))
            {
                total_units = atof(fsLine);
            }
            pclose(pFile);
        }

        if ( ptr->percent == PERCENT_USED )
        {
            if (total_units != 0)
            {
                ptr->resource_value = (double) (( (total_units - free_units) / total_units ) * 100);
            }
            else
            {
                ptr->resource_value = 0;
            }
            if ( log_value ( ptr->resource_value,
                             ptr->resource_prev,
                             DEFAULT_LOG_VALUE_STEP ))
            {
                plog("volume-group: %s, usage: %.2f%%\n", ptr->resource, ptr->resource_value);
            }
        }
        else
        {
            ptr->resource_value = free_units;
            if ( log_value ( ptr->resource_value,
                             ptr->resource_prev,
                             DEFAULT_LOG_VALUE_STEP ))
            {
                plog("volume-group: %s, %.2f (MiB) free\n", ptr->resource, ptr->resource_value);
            }
        }
    }
}

/*****************************************************************************
 *
 * Name    : init_memory_checking
 *
 * Purpose : Get the memory accounting used either 0: overcommit or 1: strict
 *****************************************************************************/
void init_memory_accounting()
{

    const char *strict_memory_file = "/proc/sys/vm/overcommit_memory";

    ifstream mem_file ( strict_memory_file );
    string strict_line;

    if (mem_file.is_open())
    {

        while ( getline (mem_file, strict_line) ) {
            IS_STRICT = atoi(strict_line.c_str());
        }
        mem_file.close();

    }

}

/*****************************************************************************
 *
 * Name    : thinpool_calcVirtUsage
 *
 * Purpose : Obtain the percentage of the used virtual space in thin
 *           provisioning.
 *
 * Params  : index - the index of the monitored resource (virtual space)
 *
 * Return  : PASS/FAIL
 *
 *****************************************************************************/
int thinpool_calcVirtUsage(int index,
                           const char *poolName,
                           const char *poolOwner,
                           const char *allocParam) {

    /* Initialize the variables used in calculating the virtual usage. */
    double provisioned_capacity = 0;
    double total_capacity = 0;
    double allocation_ratio = 1;
    double ratio = 0;
    double MiB = 1024.0;

    /* Buffer (and its size) for keeping the initial result after executing
       the above commands. */
    char result[BUFFER_SIZE];
    const unsigned int buffer_size = BUFFER_SIZE;

    /* Return code. */
    int rc;

    /* Save the necessary commands for obtaining the information about virtual
       thin pool usage: provisioned capacity, total capacity and maximum
       oversubscription ratio. */
    const char *provisioned_capacity_cmd = NULL;
    const char *allocation_ratio_cmd = NULL;
    char total_capacity_cmd[BUFFER_SIZE];

    snprintf(total_capacity_cmd, sizeof(total_capacity_cmd),
             "lvs --units m --segments | grep \"%s\" | awk '{print $6}' | sed '$s/.$//'",
             poolName);

    if (strcmp (poolOwner, "Cinder") == 0) {
        const char *cinder_provisioned_capacity_cmd ="lvs --units m | grep \"volume-[.]*\" | awk '{ sum+=$4} END {print sum}'";
        const char *cinder_allocation_ratio_cmd = "cat /etc/cinder/cinder.conf | grep \"^max_over_subscription_ratio\" | cut -d '=' -f 2";
        provisioned_capacity_cmd = cinder_provisioned_capacity_cmd;
        allocation_ratio_cmd = cinder_allocation_ratio_cmd;
    } else if (strcmp (poolOwner, "Nova") == 0) {
        const char *nova_provisioned_capacity_cmd = "lvs --units m | grep \"[.]*_disk\" | awk '{ sum+=$4} END {print sum}'";
        provisioned_capacity_cmd = nova_provisioned_capacity_cmd;
    }
    /* Determine the provisioned capacity. */
    rc = execute_pipe_cmd(provisioned_capacity_cmd, result, buffer_size);
    if (rc != PASS) {
        wlog("%s LVM Thinpool ; unable to query provisioned capacity (rc:%i)",
             poolOwner, rc);
        return (FAIL);
    }
    provisioned_capacity = atof(result);
    dlog("%s LVM Thinpool provisioned capacity is %f", poolOwner, provisioned_capacity);

    /* If the threshold is of percentage type, then also determine the total
       thin pool capacity and the max oversubscription ratio. */
    rc = execute_pipe_cmd(total_capacity_cmd, result, buffer_size);
    if (rc != PASS) {
        elog("%s LVM Thinpool ; unable to query total capacity (rc:%i)",
             poolOwner, rc);
        return (FAIL);
    }
    total_capacity = atof(result);
    dlog("%s LVM Thinpool total capacity is %f",
         poolOwner, total_capacity);

    if (strcmp (poolOwner, "Cinder") == 0) {
        rc = execute_pipe_cmd(allocation_ratio_cmd, result, buffer_size);
        if (rc != PASS) {
            elog("%s LVM Thinpool %s ratio could not be determined (rc:%i)",
                 allocParam, poolOwner, rc);
            return (FAIL);
        }
        allocation_ratio = atof(result);
    } else if (strcmp (poolOwner, "Nova") == 0) {
        allocation_ratio = 1.0;
    }
    dlog("%s LVM Thinpool %s is %f", poolOwner, allocParam, allocation_ratio);

    /* If the allocation_ratio is 0 or hasn't been found, its default
       value should be 1. */
    if (allocation_ratio == 0)
        allocation_ratio = 1;

    /* Compute the current virtual space usage of the thin pool. */
    if (total_capacity != 0){
        ratio = provisioned_capacity / (total_capacity * allocation_ratio) * 100;
    } else {
        /*3 minutes (30 sec * rate_throttle = 180 sec)*/
        /* Change the warning log to a debug log to avoid generating this log in
           rmond.log when Cinder is Ceph backended. Once the repackaging of cinder_virtual_resource.conf
           and nova_virtual_resource.conf is done, we will change it back to warning log. */
        dlog("%s LVM Thinpool total capacity is 0\n", poolOwner);
        return (FAIL);
    }

    /* Update the resource value configuration. */
    if (resource_config[index].percent == 1) {
        resource_config[index].resource_value = ratio;
        if ( log_value ( resource_config[index].resource_value,
                         resource_config[index].resource_prev,
                         DEFAULT_LOG_VALUE_STEP ))
        {
            plog("%s LVM Thinpool Usage: %.2f%%", poolOwner, ratio);
        }
    }
    else {
        resource_config[index].resource_value =
            ((total_capacity * allocation_ratio) - provisioned_capacity) * MiB;
        if ( log_value ( resource_config[index].resource_value,
                         resource_config[index].resource_prev,
                         DEFAULT_LOG_VALUE_STEP ))
        {
            plog("%s LVM Thinpool has %.2f (MiB) free",
                     poolOwner,
                     resource_config[index].resource_value);
        }
    }
    return (PASS);
}
/*****************************************************************************
 *
 * Name    : calculate_virtual_space_usage
 *
 * Purpose : Obtain the percentage of the used virtual space in thin
 *           provisioning.
 *
 * Params  : index - the index of the monitored resource (virtual space)
 *
 * Return  : PASS/FAIL
 *
 *****************************************************************************/
int calculate_virtual_space_usage(int index, const char* constant) {
    int rc = 0;
    if (strcmp(constant, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) {
        rc = thinpool_calcVirtUsage(index,
                                    "cinder-volumes-pool",
                                    "Cinder",
                                    "max_over_subscription_ratio");
    } else if (strcmp(constant, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) {
        rc = thinpool_calcVirtUsage(index,
                                    "nova-local-pool",
                                    "Nova",
                                    "disk_allocation_ratio");
    }

    return rc;
}

/*****************************************************************************
 *
 * Name    : calculate_memory_usage
 *
 * Purpose : Calculate the memory usage as a percentage or absolute value for the
 * number of MiB left.  The overall average memory usage as well as the per NUMA
 * node memory usage is computed.
 *****************************************************************************/
void calculate_memory_usage( int index ) {

    const char *mem_info = "/proc/meminfo";
    FILE *pFile;
    char memoryLine[40];
    char attribute_name[30];
    double memUsage, memUsageHuge;
    char *line0 = &memoryLine[0];
    char *line3 = &memoryLine[3];
    char *line10 = &memoryLine[10];
    unsigned long int value;
    unsigned long int avail = 0;
    unsigned long int memTotal;
    int resource_name_size = 100;
    string resource_name_huge = "processor_hugepages_";
    string resource_name = "processor_";
    char numa_node[resource_name_size];
    string criticality = "critical";
    double MiB = 1024.0;
    int absolute_thresholds[3];
    memoryinfo memInfo;
    struct dirent *ent;
    DIR *numa_node_dir;
    vector<string> numa_files;
    vector<string> node_files;

    memset ( (char*)&memInfo, 0, sizeof(memoryinfo));

    if ((pFile = fopen(mem_info, "r")) == NULL){
        dlog("failed to open: /proc/meminfo \n");
    }

    else {

        while (memset(memoryLine, 0, sizeof(memoryLine)) && (fgets((char*) &memoryLine, sizeof(memoryLine), pFile) != NULL)) {

            if (*line3 == 'T') {
                /* match MemTotal */
                value = 0UL;
                if (sscanf(memoryLine, "MemTotal: %lu", &value) == 1) {
                    memInfo.MemTotal = value;
                    continue;
                }
            } else if (*line3 == 'F') {
                /* match MemFree */
                value = 0UL;
                if (sscanf(memoryLine, "MemFree: %lu", &value) == 1) {
                    memInfo.MemFree = value;
                    continue;
                }
            } else if (*line3 == 'f') {
                /* match Buffers */
                value = 0UL;
                if (sscanf(memoryLine, "Buffers: %lu", &value) == 1) {
                    memInfo.Buffers = value;
                    continue;
                }
            } else if (*line3 == 'h') {
                /* match Cached */
                value = 0UL;
                if (sscanf(memoryLine, "Cached: %lu", &value) == 1) {
                    memInfo.Cached = value;
                    continue;
                }
            } else if ((*line0 == 'S') && (*line3 == 'c')) {
                /* match Slab Reclaimable */
                value = 0UL;
                if (sscanf(memoryLine, "SReclaimable: %lu", &value) == 1) {
                    memInfo.SlabReclaimable = value;
                    continue;
                }
            } else if ((*line0 == 'C') && (*line10 == 't')) {
                /* match CommitLimit */
                value = 0UL;
                if (sscanf(memoryLine, "CommitLimit: %lu", &value) == 1) {
                    memInfo.CommitLimit = value;
                    continue;
                }
            } else if ((*line0 == 'C') && (*line10 == 'A')) {
                /* match Committed_AS */
                value = 0UL;
                if (sscanf(memoryLine, "Committed_AS: %lu", &value) == 1) {
                    memInfo.Committed_AS = value;
                    continue;
                }
            } else if ((*line0 == 'H') && (*line10 == 'T')) {
                /* match Hugepages_Total */
                value = 0UL;
                if (sscanf(memoryLine, "HugePages_Total: %lu", &value) == 1) {
                    memInfo.HugePages_Total = value;
                    continue;
                }
            }
            else if ((*line0 == 'H') && (*line10 == 'z')) {
                /* match Hugepagesize */
                value = 0UL;
                if (sscanf(memoryLine, "Hugepagesize: %lu", &value) == 1) {
                    memInfo.Hugepagesize = value;
                    continue;
                }
            }
            else if ((*line0 == 'A') && (*line3 == 'n')) {
                /* match AnonPages      */
                value = 0UL;
                if (sscanf(memoryLine, "AnonPages: %lu", &value) == 1) {
                    memInfo.AnonPages = value;
                    continue;
                }
            }
        }
        fclose(pFile);
    }

    avail = memInfo.MemFree + memInfo.Buffers + memInfo.Cached + memInfo.SlabReclaimable;
    memTotal = avail + memInfo.AnonPages;
    dlog("memTotal: %lu\n", memTotal);

    /* average memory utilization */
    if (IS_STRICT == 1) {
        /* strict memory checking enabled */
        if (resource_config[index].percent == 1) {
            memUsage = (double) memInfo.Committed_AS / memInfo.CommitLimit;
            memUsage = memUsage * 100;
        } else {
            memUsage = (double) (memInfo.CommitLimit - memInfo.Committed_AS) / MiB;
        }
    } else {
        if (resource_config[index].percent == 1)
        {
            memUsage = (double) memInfo.AnonPages / memTotal;
            memUsage = memUsage * 100;
        } else
        {
            memUsage = (double) avail / MiB;
        }
    }
    resource_config[index].resource_value = memUsage;
    if (resource_config[index].percent == 1)
    {
        if ( log_value ( resource_config[index].resource_value,
                         resource_config[index].resource_prev,
                         DEFAULT_LOG_VALUE_STEP ))
        {
            plog("%s: %.2f%%\n",
                      resource_config[index].resource, memUsage);
        }
    }
    else
    {
        if ( log_value ( resource_config[index].resource_value,
                         resource_config[index].resource_prev,
                         DEFAULT_LOG_VALUE_STEP ))
        {
            plog("%s: %.2f (MiB) free\n",
                      resource_config[index].resource, memUsage);
        }
    }
    if ((numa_node_dir= opendir ("/sys/devices/system/node/")) != NULL) {
        /* print all the files and directories within directory */
        while ((ent = readdir (numa_node_dir)) != NULL) {
            if (strstr(ent->d_name, "node") != NULL) {
                numa_files.push_back(ent->d_name);
            }
        }
        closedir (numa_node_dir);
    }

    /* loop through all NUMA nodes to get memory usage per NUMA node */
    for (unsigned int p=0; p<numa_files.size(); p++) {

        snprintf(numa_node, resource_name_size, "/sys/devices/system/node/%s/meminfo",numa_files.at(p).c_str());
        resource_name += numa_files.at(p);
        resource_name_huge += numa_files.at(p);
        pFile = fopen (numa_node, "r");

        while (memset(memoryLine, 0, sizeof(memoryLine)) && (fgets((char*) &memoryLine, sizeof(memoryLine), pFile) != NULL)) {

            sscanf(memoryLine, "Node %*d %29s %lu", attribute_name, &value);

            if (strstr(attribute_name, "MemTotal") != NULL) {
                /* match MemTotal */
                memInfo.MemTotal = value;

            } else if (strstr(attribute_name, "MemFree") != NULL) {
                /* match MemFree */
                memInfo.MemFree = value;

            } else if (strstr(attribute_name, "FilePages") != NULL) {
                /* match FilePages */
                memInfo.FilePages = value;

            } else if (strstr(attribute_name, "SReclaimable") != NULL) {
                /* match SReclaimable */
                memInfo.SlabReclaimable = value;

            } else if (strstr(attribute_name, "HugePages_Total") != NULL) {
                /* match HugePages_Total */
                memInfo.HugePages_Total = value;

            } else if (strstr(attribute_name, "HugePages_Free") != NULL) {
                /* match HugePages_Free */
                memInfo.HugePages_Free = value;

            } else if (strstr(attribute_name, "AnonPages") != NULL) {
                /* match AnonPages    ) */
                memInfo.AnonPages = value;
            }
        }

        fclose(pFile);

        if (_rmon_ctrl_ptr->per_node == 1) {
            /* if set to 1 get the per NUMA node memory values */
            memset(absolute_thresholds, 0, sizeof(absolute_thresholds));
            avail = memInfo.MemFree + memInfo.FilePages + memInfo.SlabReclaimable;
            memTotal = avail + memInfo.AnonPages;
            /* NUMA node memory usage */
            if (resource_config[index].percent == 1) {
                memUsage = (double) memInfo.AnonPages / memTotal;
                memUsage = memUsage * 100;
                dlog("Memory Usage %s: %.2f%% \n", resource_name.c_str(), memUsage);
            } else {
                memUsage = (double) avail / MiB;
                dlog("Memory Available %s: %.2f MB \n", resource_name.c_str(), memUsage);
            }
            /* initialize a new dynamic resource for the NUMA node if it does not already exist */
            save_dynamic_mem_resource ( resource_name, criticality, memUsage, resource_config[index].percent,
                    absolute_thresholds, MEMORY_ALARM_ID );
        }


        if (HUGEPAGES_NODE == 1) {
            /* huge pages memory usage for the NUMA node */
            if (memInfo.HugePages_Total != 0){
                if (resource_config[index].percent == 1){
                    memUsageHuge = (double) (memInfo.HugePages_Total - memInfo.HugePages_Free) / memInfo.HugePages_Total;
                    memUsageHuge = memUsageHuge * 100;
                    dlog("Memory Usage %s: %.2f%% \n", resource_name_huge.c_str(), memUsageHuge);
                } else {
                    memUsageHuge = (double) memInfo.HugePages_Free * (memInfo.Hugepagesize/MiB) ;
                    dlog("Memory Available %s: %.2f MB \n", resource_name_huge.c_str(), memUsageHuge);
                }
                save_dynamic_mem_resource ( resource_name_huge, criticality, memUsageHuge, resource_config[index].percent,
                        absolute_thresholds, MEMORY_ALARM_ID );
            }
        }
        resource_name_huge = "processor_hugepages_";
        resource_name = "processor_";
    }
}

/*****************************************************************************
 *
 * Name    : get_cpu_time
 *

 * Purpose : Parse per-cpu hi-resolution scheduling stats
 *
 *****************************************************************************/
int get_cpu_time( unsigned long long * cpu_time )
{
#define MAX_STRING_SIZE (19)

    const char *sched_stat = "/proc/schedstat";
    FILE * pFile;
    char cpu_line[500];
    unsigned long long value;
    int version = 0;
    int index = 0;
    char cpu_time_len[50];

    if ((pFile = fopen(sched_stat, "r")) == NULL){
        dlog("failed to open: /proc/schedstat \n");
        return (FAIL);
    }

    else {
        /* Parse per-cpu hi-resolution scheduling stats */
        while (memset(cpu_line, 0, sizeof(cpu_line)) && (fgets((char*) &cpu_line, sizeof(cpu_line), pFile) != NULL)) {

            if (version != 15){
                /* only version 15 is supported */
                if (sscanf(cpu_line, "version %llu", &value) == 1) {
                    version = (int) value;
                }
            }
            else if ((strstr(cpu_line, "cpu") != NULL) && (version == 15))
            {
                sscanf(cpu_line, "%*s %*s %*s %*s %*s %*s %*s %49s ",cpu_time_len);
                if (((unsigned)strlen(cpu_time_len)) < MAX_STRING_SIZE) {
                    /* get the cpu time values for each cpu which is the 7th field */
                    sscanf(cpu_line, "%*s %*s %*s %*s %*s %*s %*s %llu ",&value);
                    cpu_time[index++] = value;
                }
                else {
                    elog("%s exceeded 2^64 for cpu stats cannot calculate cpu usage\n", cpu_time_len);
                    cpu_time[index++] = 0;
                }
            }

        }
        fclose(pFile);
    }

    return (PASS);
}

/*****************************************************************************
 *
 * Name    : cpu_monitoring_init
 *

 * Purpose : Get the base cpu list if running on a compute.  Also get the number
 * of cpus from: /proc/cpuinfo
 *****************************************************************************/
void cpu_monitoring_init()
{

    string base_cpu="";
    FILE * pFile;
    string delimiter = ",", delimiterTwo = "-";
    size_t pos = 0;
    string token;
    char cpu_line[100];
    const char *cpu_info = "/proc/cpuinfo";
    char processor[20];

    pFile = fopen (COMPUTE_RESERVED_CONF , "r");
    if (pFile != NULL){
        ilog("File %s is present\n", COMPUTE_RESERVED_CONF);
        ifstream fin( COMPUTE_RESERVED_CONF  );
        string line;

        while( getline( fin, line ) ) {
            /* process each line */
            if( line.find ("PLATFORM_CPU_LIST=") != string::npos ) {
                stringstream ss( line );
                getline( ss, base_cpu, '=' ); // token = string before =
                getline( ss, base_cpu, '=' ); // token = string after =
                ilog("Found PLATFORM_CPU_LIST set to %s in file %s\n", base_cpu.c_str(), COMPUTE_RESERVED_CONF);
            }
        }
        fclose (pFile);
    }

    if (base_cpu.compare("") != 0)
    {
        /* get base cpus if they are available */
        if ((pos = base_cpu.find(delimiter)) != string::npos) {

            /* if the base cpus are listed with a comma, ex: 1,2 */
            base_cpu = base_cpu + delimiter;
            while ((pos = base_cpu.find(delimiter)) != string::npos) {
                token = base_cpu.substr(0, pos);
                included_cpu[num_base_cpus++] = atoi(token.c_str());
                base_cpu.erase(0, pos + delimiter.length());
            }
        } else if ((pos = base_cpu.find(delimiterTwo)) != string::npos) {

            /* if the base cpus are listed with a dash, ex: 1-3 */
            base_cpu = base_cpu + delimiterTwo;
            token = base_cpu.substr(0, pos);
            int first_cpu =  atoi(token.c_str());
            base_cpu.erase(0, pos + delimiterTwo.length());
            pos = base_cpu.find(delimiterTwo);
            token = base_cpu.substr(0, pos);
            int last_cpu =  atoi(token.c_str());

            /* loop through the list of base cpus */
            for (num_base_cpus=0; num_base_cpus<=(last_cpu - first_cpu); num_base_cpus++){
                included_cpu[num_base_cpus++] = first_cpu++;
            }
        }

        if (num_base_cpus == 0) {
            /* only one base cpu available */
            included_cpu[num_base_cpus++] = atoi(base_cpu.c_str());
        }
    }

    ilog("Number of base CPUs for this node is %d \n", num_base_cpus);

    /* get the number of cpus */
    if ((pFile = fopen(cpu_info, "r")) == NULL){
        wlog("failed to open: /proc/cpuinfo \n");
    }

    else {

        /* Parse per-cpu hi-resolution scheduling stats */
        while (memset(cpu_line, 0, sizeof(cpu_line)) && (fgets((char*) &cpu_line, sizeof(cpu_line), pFile) != NULL)) {

            sscanf(cpu_line, "%19s %*s %*s", processor);
            if (strcmp(processor, "processor") == 0) {
                num_cpus++;
            }
        }
        fclose(pFile);
    }

    ilog("Number of CPUs for this node is %d \n", num_cpus);
}

/*****************************************************************************
 *
 * Name    : calculate_linux_usage
 *
 * Purpose : Calculate the cpu usage for Linux cards: controller, compute, storage
 * The calculation runs as a delta.  The first time the function is called no
 * valid cpu calculation occurs.  From the second time onwards, the cpu uasge is
 * calculated by taking the delta from the previous time the function was called
 *
 *****************************************************************************/
int calculate_linux_usage( resource_config_type * ptr )
{

    double delta_seconds;
    unsigned long long cpu_occupancy[num_cpus];
    unsigned long long cpu_delta_time;
    unsigned long long total_avg_cpu = 0;
    unsigned int counted_cpu=0;
    int rc;
    unsigned long long cpu_time[num_cpus];

    if (cpu_time_initial.size() == 0) {
        /* get the cpu time initially if the first cpu time does not exist */
        rc = get_cpu_time( cpu_time );
        /* get the first timestamp */
        time(&t1);

        if (rc != PASS)
        {
            wlog("Failed get_cpu_time \n");
            return (FAIL);
        }

        for (int x=0; x<num_cpus; x++){
            /* add the pointer value to a vector for permanent storage */
            cpu_time_initial.push_back(cpu_time[x]);
            dlog2("cpu_time_initial for Cpu%d set to %llu \n", x, cpu_time[x]);
        }
        ptr->resource_value = 0;
    }
    else {
        /* get the later cpu time if the first cpu time exists */
        rc = get_cpu_time( cpu_time );

        if (rc != PASS)
        {
            wlog("Failed get_cpu_time \n");
            return (FAIL);
        }

        /* get the later timestamp */
        time(&t2);

        for (int x=0; x<num_cpus; x++){
            /* add the pointer value to a vector for permanent storage */
            cpu_time_later.push_back(cpu_time[x]);
            dlog2("cpu_time_later for Cpu%d %llu \n", x, cpu_time[x]);
        }

        delta_seconds = difftime(t2, t1);

        if (num_base_cpus == 0)
        {
            /*this is a controller or storage node there are no vswitch or pinned cpus */
            for (int j=0; j<num_cpus; j++)
            {
                /* calculate the cpu runtime for the specific cpu core */
                dlog("Cpu%d delta_seconds: %f\n",j, delta_seconds);
                dlog("Cpu%d later cycles: %llu , early cycles: %llu \n", j, cpu_time_later.at(j), cpu_time_initial.at(j));

                cpu_delta_time = ((cpu_time_later.at(j) - cpu_time_initial.at(j)) / 1000000 /delta_seconds);
                cpu_occupancy[j] = (100*(cpu_delta_time))/1000;

                dlog("Cpu%d cpu_delta_time: %llu\n",j, cpu_delta_time);
                dlog("Cpu%d cpu_occupancy: %llu\n",j, cpu_occupancy[j]);

                total_avg_cpu += cpu_occupancy[j];
                counted_cpu++;
            }
        } else {
            /* this is a compute node, do not include vswitch or pinned cpus in calculation */
            for (int j=0; j<num_base_cpus; j++)
            {
                /* calculate the cpu runtime for the specific cpu core */
                dlog("Cpu%d delta_seconds: %f\n", included_cpu[j], delta_seconds);
                dlog("Cpu%d later cycles: %llu , early cycles: %llu \n", included_cpu[j], cpu_time_later.at(included_cpu[j]), cpu_time_initial.at(included_cpu[j]));

                cpu_delta_time = ((cpu_time_later.at(included_cpu[j]) - cpu_time_initial.at(included_cpu[j])) / 1000000 /delta_seconds);
                cpu_occupancy[j] = (100*(cpu_delta_time))/1000;

                dlog("Cpu%d cpu_delta_time: %llu\n", included_cpu[j], cpu_delta_time);
                dlog("Cpu%d cpu_occupancy: %llu\n", included_cpu[j], cpu_occupancy[j]);

                total_avg_cpu += cpu_occupancy[j];
                counted_cpu++;
            }
        }

        dlog("total_avg_cpu: %llu , counted_cpu: %d \n", total_avg_cpu, counted_cpu);

        ptr->resource_value = (double) (total_avg_cpu / counted_cpu);
        /* clear the old cpu times and set the current times as the old times */
        cpu_time_initial.clear();
        for (int x=0; x<num_cpus; x++){
            /* copy the current cpu times to the earlier ones */
            cpu_time_initial.push_back(cpu_time_later.at(x));
        }
        /* clear the current cpu times as they will be updated the next time around */
        cpu_time_later.clear();
        /* set the previous time as the current time */
        t1 = t2;

        #define LINUX_CPU_LOG_VALUE_STEP (10)
        if ( log_value ( ptr->resource_value,
                         ptr->resource_prev,
                         LINUX_CPU_LOG_VALUE_STEP ))
        {
            plog("%s: %.2f%% (average)\n", ptr->resource, ptr->resource_value);
        }
    }

    return (PASS);
}

/* Read the node UUID from the: /etc/platform/platform.conf file */
void _readUUID ()
{
    FILE * pFile;
    const char *platformFile = "/etc/platform/platform.conf";

    pFile = fopen (platformFile , "r");
    if (pFile != NULL) {
        ifstream fin( platformFile );
        string line;

        while( getline( fin, line ) ) {
            /* process each line */
            if( line.find ("UUID=") != string::npos ) {
                stringstream ss( line );
                getline( ss, hostUUID, '=' ); // token = string before =
                getline( ss, hostUUID, '=' ); // token = string after =
            }
        }
        fclose (pFile);
    }
}

/*****************************************************************************
 *
 * Name    : _load_rmon_interfaces
 *
 * Purpose : Update the monitored network interfaces from the:
 * /etc/plaform/interfaces file
 *****************************************************************************/
void _load_rmon_interfaces ()
{

    rmon_socket_type   * sock_ptr = rmon_getSock_ptr ();

    /* initialize interface monitoring */
    for ( int j = 0 ; j < _rmon_ctrl_ptr->interface_resources; j++ )
    {
        init_physical_interfaces ( &interface_resource_config[j] );
    }

    for (int i=0; i<_rmon_ctrl_ptr->interface_resources; i++)
    {
        if ( interface_resource_config[i].interface_used == true )
        {
            /* set the link state for all the primary physical interfaces */
            if ( get_link_state ( sock_ptr->ioctl_sock, interface_resource_config[i].interface_one, &interface_resource_config[i].link_up_and_running ) )
            {
                interface_resource_config[i].link_up_and_running = false ;
                interface_resource_config[i].resource_value = INTERFACE_DOWN;
                wlog ("Failed to query %s operational state ; defaulting to down\n", interface_resource_config[i].interface_one) ;
            }
            else
            {
                ilog ("%s link is: %s\n", interface_resource_config[i].interface_one, interface_resource_config[i].link_up_and_running ? "Up" : "Down" );
                if (interface_resource_config[i].link_up_and_running)
                {
                    interface_resource_config[i].resource_value = INTERFACE_UP;
                }
                else
                {
                    interface_resource_config[i].resource_value = INTERFACE_DOWN;
                    interface_resource_config[i].failed = true;
                }
            }
            if (interface_resource_config[i].lagged == true)
            {
                /* set the link state for all the lagged physical interfaces */
                if ( get_link_state ( sock_ptr->ioctl_sock, interface_resource_config[i].interface_two, &interface_resource_config[i].link_up_and_running ) )
                {
                    interface_resource_config[i].link_up_and_running = false ;
                    wlog ("Failed to query %s operational state ; defaulting to down\n", interface_resource_config[i].interface_two) ;
                }
                else
                {
                    ilog ("%s link is: %s\n", interface_resource_config[i].interface_two, interface_resource_config[i].link_up_and_running ? "Up" : "Down" );
                    if (interface_resource_config[i].link_up_and_running)
                    {
                        interface_resource_config[i].resource_value_lagged = INTERFACE_UP;
                    }
                    else
                    {
                        interface_resource_config[i].resource_value_lagged = INTERFACE_DOWN;
                        interface_resource_config[i].failed = true;
                    }
                }
            }
        }
    }

    for ( int j = 0 ; j < _rmon_ctrl_ptr->interface_resources; j++ )
    {
        interface_alarming_init ( &interface_resource_config[j] );
    }
}

/*****************************************************************************
 *
 * Name    : resource_stall_monitor
 *
 * Purpose : Detects stalls in the resource monitoring threads
 ******************************************************************************/
int resource_stall_monitor ( resource_config_type * ptr, pid_t tid, pid_t pid)
{
    #define MAX_SCHEDSTAT_LEN (128)
    char file_path [MAX_FILENAME_LEN] ;
    char schedstat [MAX_SCHEDSTAT_LEN] ;
    FILE * fp ;
    int rc = PASS;
    unsigned long long nr_switches_old = t_data.nr_switches_count;

    snprintf ( &file_path[0], MAX_FILENAME_LEN, "/proc/%d/task/%d/schedstat", pid, tid );
    fp = fopen (file_path, "r" );
    if ( fp )
    {
        /* check to see if the thread is stalled */
        memset ( schedstat, 0 , MAX_SCHEDSTAT_LEN );
        if ( fgets ( &schedstat[0], MAX_SCHEDSTAT_LEN, fp) != NULL)
        {
            if ( sscanf ( schedstat, "%*s %*s %llu", &t_data.nr_switches_count) >= 1 )
            {
                dlog ("%s: nr_count: %llu, nr_count_old: %llu \n", ptr->resource, t_data.nr_switches_count, nr_switches_old);
                if ((nr_switches_old != t_data.nr_switches_count) && (ptr->failed))
                {
                    /* Clear the stall monitor alarm */
                    ilog("%s thread has unstalled \n", ptr->resource);
                    ptr->sev = SEVERITY_CLEARED;
                    t_data.nr_switches_count = 0;
                    resourceStageChange ( ptr, RMON_STAGE__FINISH );
                }
            }
            else
            {
                wlog ("Failed to get schedstat from (%s)\n", file_path);
                rc = FAIL;
            }
        }
        else
        {
            wlog ("failed to read from (%s)\n", file_path );
            rc = FAIL;
        }
        fclose(fp);
    }
    else
    {
        wlog ("Failed to open (%s)\n", file_path);
        rc = FAIL;
    }

    if ((((nr_switches_old == t_data.nr_switches_count) && (ptr->sev != SEVERITY_MAJOR))) ||
        (rc == FAIL))
    {
        /* thread has stalled raise alarm */
        elog("%s thread has stalled \n", ptr->resource);
        ptr->sev = SEVERITY_MAJOR;
        ptr->failed = true;
        resourceStageChange ( ptr, RMON_STAGE__MANAGE );
    }

    return rc;
}

/*****************************************************************************
 *
 * Name    : check_instance_file
 *
 * Purpose : Thread spawned by rmon to check if: /etc/nova/instances is mounted.
 * It needs to be a thread because of NFS hang issues.
 *
 *****************************************************************************/
void *check_instance_file(void *threadarg)
{
    struct thread_data *res_data;
    FILE * pFile;
    FILE *testFile;
    string line;
    struct stat p;
    const char *instances_dir = "/etc/nova/instances";
    const char *test_file = "/etc/nova/instances/.rmon_test";

    res_data = (struct thread_data *) threadarg;

    pthread_mutex_lock(&lock);
    res_data->thread_running = true;
    res_data->tid = syscall(SYS_gettid);
    pthread_mutex_unlock(&lock);

    dlog("%s process id: %d, thread id: %d \n", res_data->resource->resource, res_data->pid, res_data->tid);
    res_data->resource_usage = NOT_MOUNTED;
    pFile = fopen (MOUNTS_DIR , "r");

    /* query /proc/mounts and make sure the /etc/nova/instances file system is there */
    if (pFile != NULL)
    {
        ifstream fin( MOUNTS_DIR );
        while( getline( fin, line ) )
        {
            /* process each line */
            if( line.find (instances_dir) != string::npos )
            {
                /* the mount is present */
                res_data->resource_usage = MOUNTED;
                break;
            }
        }
        fclose (pFile);
    }

    if ( res_data->resource_usage == MOUNTED )
    {
       /* put the test file in and check that it is accessible */
       testFile = fopen(test_file, "w");
       if (testFile != NULL)
       {
            fclose (testFile);
            if( remove( test_file ) != 0 )
            {
                elog("Failure in removing rmond test file: %s \n", test_file);
            }
       }
       else
       {
           res_data->resource_usage = NOT_MOUNTED;
       }
    }

    if (res_data->resource_usage == NOT_MOUNTED)
    {
        /* fail the resource */
        stat (COMPUTE_CONFIG_PASS, &p);
        if ((p.st_ino != 0 ) || (p.st_dev != 0))
        {
            pthread_mutex_lock(&lock);
            if (res_data->resource->sev != SEVERITY_MAJOR)
            {
                res_data->resource->sev = SEVERITY_MAJOR;
                res_data->resource->failed = true;
                resourceStageChange ( res_data->resource, RMON_STAGE__MANAGE );
            }
            pthread_mutex_unlock(&lock);
        }
    }
    else if ((res_data->resource_usage == MOUNTED) && (res_data->resource->failed))
    {
        pthread_mutex_lock(&lock);
        res_data->resource->sev = SEVERITY_CLEARED;
        resourceStageChange ( res_data->resource, RMON_STAGE__FINISH );
        pthread_mutex_unlock(&lock);
    }

    pthread_mutex_lock(&lock);
    res_data->thread_running = false;
    pthread_mutex_unlock(&lock);

   pthread_exit(NULL);
}


/*****************************************************************************
 *
 * Name    : postPMs
 *
 * Purpose : create samples for each resource in Ceilometer
 *
 *****************************************************************************/
int _postPMs ()
{
    char meta_data[MAX_LEN];
    if ( hostUUID.empty() )
    {
        /* keep trying to get the host UUID if it is not present */
        _readUUID();
    }

    if ( !hostUUID.empty() )
    {
        // indicate the platform hostname as metadata for all resources
        char *hoststring = strdup(_rmon_ctrl_ptr->my_hostname);
        if (hoststring) {
            char *host = strtok(hoststring,"=");
            host = strtok(NULL, "=");
            snprintf(&meta_data[0], MAX_LEN, "{\"host\":\"%s\"}", host);
            free(hoststring);
        }

        for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ )
        {
            ostringstream strs;
            strs << resource_config[i].resource_value  ;
            string res_val = strs.str();

            if (strcmp(resource_config[i].resource, CPU_RESOURCE_NAME) == 0) {
                /* cpu resource pm */
                generate_ceilometer_pm ( hostUUID, "platform.cpu.util", "delta", "%",
                                         res_val, string(meta_data) );
            }
            else if (strcmp(resource_config[i].resource, MEMORY_RESOURCE_NAME) == 0) {
                /* memory resource pm */
                if (resource_config[i].percent == 1) {
                    generate_ceilometer_pm ( hostUUID, "platform.mem.util", "delta", "%",
                                             res_val, string(meta_data) );
                } else {
                    generate_ceilometer_pm ( hostUUID, "platform.mem.util", "gauge", "MB",
                                             res_val, string(meta_data) );
                }
            }
            else if (strcmp(resource_config[i].resource, FS_RESOURCE_NAME) == 0) {
                /* filesystem resource pm */
                if (resource_config[i].percent == 1) {
                    generate_ceilometer_pm ( hostUUID, "platform.fs.util", "delta", "%",
                                             res_val, string(meta_data) );
                } else {
                    generate_ceilometer_pm ( hostUUID, "platform.fs.util", "gauge", "MB",
                                             res_val, string(meta_data) );
                }
            }
        } // end of resource loop
    }
    return (PASS);
}

/*****************************************************************************
 *
 * Name    : _get_events
 *
 * Purpose : query each resource and extract the required usage values
 *
 *****************************************************************************/

extern bool is_cpe ( void );
extern bool is_worker ( void );

void _get_events (void)
{
    int rc;
    string v_cpu;
    FILE * pFile;

    if ( _rmon_ctrl_ptr->clients == 0 )
    {
        wlog ("Monitoring with no registered clients\n");
    }

    for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ )
    {
        const char *resource = resource_config[i].resource;
        ilog_throttled ( resource_config[i].resource_monitor_throttle, 120,
                         "Monitoring '%s'\n",
                         resource );

        if (strcmp(resource, CPU_RESOURCE_NAME) == 0)
        {
            /* linux cards: controller, compute and storage cpu utilization */
            rc = calculate_linux_usage( &resource_config[i] );
            if ( rc == PASS )
            {
                /* get if the resource is failed to be used by resource handler */
                process_failures ( &resource_config[i]);
            }
        }
        else if (!strcmp(resource, V_CPU_RESOURCE_NAME) ||
                 !strcmp(resource, V_MEMORY_RESOURCE_NAME) ||
                 !strcmp(resource, V_PORT_RESOURCE_NAME) ||
                 !strcmp(resource, V_INTERFACE_RESOURCE_NAME) ||
                 !strcmp(resource, V_LACP_INTERFACE_RESOURCE_NAME) ||
                 !strcmp(resource, V_OVSDB_RESOURCE_NAME) ||
                 !strcmp(resource, V_NETWORK_RESOURCE_NAME) ||
                 !strcmp(resource, V_OPENFLOW_RESOURCE_NAME))
        {
            /* ensure that configuration has completed before computing
             * vswitch resource utilization */
            if ( !daemon_is_file_present ( CONFIG_COMPLETE_WORKER ) )
                continue ;

            pFile = fopen (COMPUTE_VSWITCH_DIR , "r");
            if (pFile != NULL){
                fclose (pFile);
            }
            else
            {
                wlog ("%s failed to open %s\n", resource, COMPUTE_VSWITCH_DIR);
            }
        }
        else if (strstr(resource_config[i].resource, V_MEMORY_RESOURCE_NAME) != NULL)
        {
            /* vswitch memory with specific sockets */
            /* skip these ones as they are already taken care of above */
        }
        else if(strcmp(resource, REMOTE_LOGGING_RESOURCE_NAME) == 0)
        {
            rmonHdlr_remotelogging_query(&resource_config[i]);
        }
        else if (strcmp(resource, INSTANCE_RESOURCE_NAME) == 0)
        {
            /* do not perform this check if we are not on a compute node.
             * its not valid on storage not combo load */
            if ( !is_worker () )
                continue ;

            if ( !daemon_is_file_present ( CONFIG_COMPLETE_WORKER ) )
                continue ;

            /* nova instances mount check */
            pFile = fopen (COMPUTE_VSWITCH_DIR , "r");
            if (pFile != NULL)
            {
                rc = PASS ;
                pthread_mutex_lock(&lock);
                if (!t_data.thread_running)
                {
                    pthread_attr_t attr ;
                    t_data.resource = &resource_config[i];
                    pthread_attr_init (&attr);
                    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
                    /* launch a thread to monitor the /etc/nova/instances mount */
                    rc = pthread_create(&thread, &attr, check_instance_file, (void *) &t_data);
                    if (rc)
                    {
                        elog("%s ERROR; return code from pthread_create() is %d\n",
                             resource, rc);
                    }
                    pthread_attr_destroy (&attr);
                }
                else
                {
                    /* If thread is still running check that it is not stalled */
                    resource_stall_monitor(&resource_config[i], t_data.tid, t_data.pid);
                }
                pthread_mutex_unlock(&lock);
                fclose (pFile);
            }
        }
        else if (strcmp(resource, MEMORY_RESOURCE_NAME) == 0) {
            /* memory utilization */
            calculate_memory_usage(i);
            /* get if the resource is failed to be used by resource handler */
            if (resource_config[i].percent == PERCENT_USED) {
                process_failures ( &resource_config[i]);
            } else {
                process_failures_absolute ( &resource_config[i]);
            }

        }
        else if ((strcmp(resource, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) &&
                 (resource_config[i].alarm_status == ALARM_ON)) {
            /* virtual thin pool space utilization */
            rc = calculate_virtual_space_usage(i, V_CINDER_THINPOOL_RESOURCE_NAME);
            /* only check resource for fail and clear if it is active */
            if (rc == PASS) {
                if (resource_config[i].percent == PERCENT_USED) {
                    /* get if the resource is failed to be used by resource handler */
                    process_failures (&resource_config[i]);
                } else {
                    process_failures_absolute (&resource_config[i]);
                }
            }
        }
        else if ((strcmp(resource, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) &&
                 (resource_config[i].alarm_status == ALARM_ON)){
            /* do not perform this check if we are not on a compute node.
             * its not valid on storage not combo load */
            if ( !is_worker () && !is_cpe () )
                continue ;

            if ( !daemon_is_file_present ( CONFIG_COMPLETE_WORKER ) )
                continue ;

            /* virtual thin pool space utilization */
            rc = calculate_virtual_space_usage(i, V_NOVA_THINPOOL_RESOURCE_NAME);
            /* only check resource for fail and clear if it is active */
            if (rc == PASS) {
                if (resource_config[i].percent == PERCENT_USED) {
                    /* get if the resource is failed to be used by resource handler */
                    process_failures (&resource_config[i]);
                } else {
                    process_failures_absolute (&resource_config[i]);
                }
            }
        }
        else if (strcmp(resource, FS_RESOURCE_NAME) == 0) {
            /* file system utilization */
            /* do nothing as we calculate individual file system location and not the total */
        }
        else {
            /* dynamic file system resource */

            pthread_mutex_lock(&lock);
            if ((resource_config[i].alarm_status == ALARM_ON) && (modifyingResources == false))
            {
                /* only calculate the resource usage if file systems aren't being added */
                calculate_fs_usage( &resource_config[i] );

                /* only check resource for fail and clear if it is active */
                if (resource_config[i].percent == PERCENT_USED) {
                    /* get if the resource is failed to be used by resource handler */
                    process_failures ( &resource_config[i]);
                } else {
                    process_failures_absolute ( &resource_config[i]);
                }
            }
            else if ((resource_config[i].alarm_status == ALARM_OFF) && (modifyingResources == false)
                    && (resource_config[i].failed == true))
            {
                //send a clear message
                send_clear_msg(i);

                // we need to clear the resource's alarm if there was any set for this resource
                clear_alarm_for_resource(&resource_config[i]);
            }
            pthread_mutex_unlock(&lock);
        }
    } // end of rmon resources

    /*
     * since interface resources are event based resourcs, i.e.
     * they would only be called when netlink socket reports a
     * link state event, we need to run a periodic audit on them
     * as part of RMON event audit.
     * This audit shall resend interface degrade statuses to maintaince
     * if interface is in failed state
     */
    for ( int j = 0; j < _rmon_ctrl_ptr->interface_resources; j++ )
    {
        if ( interface_resource_config[j].interface_used &&
             interface_resource_config[j].failed == true )
        {
            send_interface_msg ( &interface_resource_config[j],
                                _rmon_ctrl_ptr->clients );
        }
    }
}

int kill_running_process ( int pid )
{
    int result = kill ( pid, 0 );
    if ( result == 0 )
    {
        result = kill ( pid, SIGKILL );
        if ( result == 0 )
        {
            wlog ("NTP process kill succeeded (%d)\n", pid );
        }
        else
        {
            elog ("NTP process kill failed (%d)\n", pid );
        }
    }
    return (PASS);
}

/* SIGCHLD handler support - for waitpid */
static bool rmon_sigchld_received = false ;
void daemon_sigchld_hdlr ( void )
{
    dlog("Received SIGCHLD ...\n");

    int status = 0;
    pid_t tpid = 0;

    while ( 0 < ( tpid = waitpid ( -1, &status, WNOHANG | WUNTRACED )))
    {
        dlog("NTP query script returned WIFEXITED:%d and WEXITSTATUS:%d for pid:%d\n", WIFEXITED(status), WEXITSTATUS(status), tpid);

        if (tpid == ntp_child_pid)
        {
            rmon_sigchld_received = true ;

            /* no need to wait for a timeout since we got a response, force a ring */
            rmonTimer_ntp.ring = true;
            ntp_status = WEXITSTATUS(status);
        }
        else
        {
            dlog ("PID:%d lookup failed ; reaped likely after timeout\n", tpid );
            ntp_status = NTP_ERROR;
        }
    }
}

int ntp_audit_handler ( )
{
    if ( ntp_stage >= NTP_STAGE__STAGES )
    {
        wlog ("Invalid ntp_stage (%d) ; correcting\n", ntp_stage );
        ntpStageChange ( NTP_STAGE__BEGIN);
    }

    switch ( ntp_stage )
    {
        // First state
        case NTP_STAGE__BEGIN:
        {
            mtcTimer_start  ( rmonTimer_ntp, rmon_timer_handler, _rmon_ctrl_ptr->ntp_audit_period );
            dlog ("Start NTP period timer (%d secs) %p\n", _rmon_ctrl_ptr->ntp_audit_period, rmonTimer_ntp.tid);
            ntpStageChange ( NTP_STAGE__EXECUTE_NTPQ );
            break ;
        }

        // Execute the ntpq command
        case NTP_STAGE__EXECUTE_NTPQ:
        {
            if ( rmonTimer_ntp.ring == true ) //wake up from NTP period
            {
                ntp_status = PASS;
                mtcTimer_start ( rmonTimer_ntp, rmon_timer_handler, _rmon_ctrl_ptr->ntpq_cmd_timeout );
                dlog ("Start NTPQ command timer (%d secs) %p\n", _rmon_ctrl_ptr->ntpq_cmd_timeout, rmonTimer_ntp.tid);

                // Execute the ntpq command
                int rc = query_ntp_servers();
                if (rc != PASS)
                {
                   elog ("NTP execute_status_command returned a failure (%d)\n", rc);
                   ntp_status = NTP_ERROR;
                }

                ntpStageChange ( NTP_STAGE__EXECUTE_NTPQ_WAIT );
            }
            break ;
        }

        // Wait for the ntpq command to finish and process results
        case NTP_STAGE__EXECUTE_NTPQ_WAIT:
        {
            // Give the command time to execute. The daemon_sigchld_hdlr will force
            // a ring when the command execute successfully or returns a failure
            if ( ( rmonTimer_ntp.ring == true) || (ntp_status == NTP_ERROR ) )
            {
                // Stop the NTP timer if still running
                if ( rmonTimer_ntp.tid )
                {
                    mtcTimer_stop ( rmonTimer_ntp );
                }

                if (( !rmon_sigchld_received) || (ntp_status == NTP_ERROR))
                {
                    if ( rmon_sigchld_received == false )
                    {
                        elog ("NTPQ command execution timed out (pid:%d)\n", ntp_child_pid );
                    }

                    elog ("NTPQ returned an execution failure (rc:%d) (pid:%d)\n", ntp_status, ntp_child_pid);
                    if (ntp_child_pid != 0)
                    {
                       kill_running_process ( ntp_child_pid );
                    }
                }
                else
                {
                    dlog ("NTPQ command was successful ; analyzing results\n");
                    ntp_query_results(ntp_status);
                }

                ntpStageChange ( NTP_STAGE__BEGIN );
                ntp_child_pid = 0;
                rmon_sigchld_received = false;
            }
            break;
        }

        default:
        {
            elog ("NTP invalid ntp_stage (%d)\n", ntp_stage );

            /* Default to first state for invalid case. there is an issue then it will be detected */
            ntpStageChange ( NTP_STAGE__BEGIN );
        }
    }
    return (PASS);
}


/*****************************************************************************
 *
 * Name    : rmon_service
 *
 * Purpose : main loop for monitoring resources
 *
 *****************************************************************************/
void rmon_service (rmon_ctrl_type * ctrl_ptr)
{
    fd_set readfds;
    struct timeval waitd;
    std::list<int> socks;
    rmon_socket_type   * sock_ptr = rmon_getSock_ptr ();

    /* initialize FM handler */
    rmon_fm_init();

    /* ignore SIGPIPE on swacts */
    signal(SIGPIPE, SIG_IGN);

    /* initialize the memory accounting: either Strict or OOM */
    init_memory_accounting();
    /* initialize the cpu monitoring defaults */
    cpu_monitoring_init();
    _readUUID();
    /* Start an event timer for the interval of the resources being monitored */
    ilog ("Starting 'Event Monitor' timer (%d secs) \n", ctrl_ptr->audit_period);
    mtcTimer_start ( rmonTimer_event, rmon_timer_handler, 1 );

    ilog ("Starting 'PM Monitor' timer (%d secs) \n", ctrl_ptr->pm_period);
    mtcTimer_start ( rmonTimer_pm, rmon_timer_handler,ctrl_ptr->pm_period);

    if (is_controller())
    {
       ntp_stage = NTP_STAGE__BEGIN;
    }

    /* Get an Authentication Token */
    ilog ("%s Requesting initial token\n", ctrl_ptr->my_hostname );
    tokenEvent.status = tokenUtil_new_token ( tokenEvent, ctrl_ptr->my_hostname );
    if ( tokenEvent.status != PASS )
    {
        elog ("Failed to get authentication token (%d)\n", tokenEvent.status);
        if ( tokenEvent.base )
        {
            slog ("%s token base:%p\n",
                      ctrl_ptr->my_hostname,
                      tokenEvent.base);
        }
    }

    /* service all the register and deregister requests in the queue */
    rmon_alive_notification( _rmon_ctrl_ptr->clients );

    ilog ("registered clients: %d\n", _rmon_ctrl_ptr->clients);

#ifdef WANT_FS_MONITORING

    /* Initialize the resource specific configuration */
    for (int j=0; j<_rmon_ctrl_ptr->resources; j++)
    {
        if ( strcmp(resource_config[j].resource, FS_RESOURCE_NAME) == 0 ) {
            /* determine whether percent or absolute values are used */
            /* determine if virtual thin pool memory usage alarm should be on or off */
            fs_percent = resource_config[j].percent;
        }
    }
    /* add the static filesystem resources */
    process_static_fs_file();
    /* initialize the resource alarms */
    for (int j=0; j<_rmon_ctrl_ptr->resources; j++)
    {
        rmon_alarming_init ( &resource_config[j] );
    }

    /* add any dynamic resources from before */
    add_dynamic_fs_resource(false);
#else
    ilog("static filesystem monitoring moved to collectd\n");
#endif

    /* Clear any stale dynamic alarms that can be caused by dynamic resources.                           */
	/* An alarm become stale for example if it was raised against a local volumn group (lvg) and         */
	/* later on the lvg is deleted. The node will come up and the lvg resource will not longer exist and */
	/* it's related alarms not refreshed. Dynamic alarms are any alarms which it's resource can be       */
	/* provisioned.                                                                                      */

    AlarmFilter alarmFilter;
	unsigned int max_alarms=75;
	char alarm_to_search[FM_MAX_BUFFER_LENGTH];

	fm_alarm_id alarm_id;
	snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID);

    SFmAlarmDataT *active_alarms = (SFmAlarmDataT*) calloc (max_alarms, sizeof (SFmAlarmDataT));
    if (active_alarms != NULL)
    {
        /* get all the current alarms with id of FS_ALARM_ID which are alarms related to the file system */
        /* fm_get_faults_by_id returns the number of alarms found */
        if (fm_get_faults_by_id( &alarm_id, active_alarms, &max_alarms) == FM_ERR_OK)
        {
            bool found = false;
            for ( unsigned int i = 0; i < max_alarms; i++ )
            {
                /* only get the 100.104 alarms */
                if ((strncmp((active_alarms+i)->alarm_id, FS_ALARM_ID, sizeof((active_alarms+i)->alarm_id)) == 0)
                     && (strstr((active_alarms+i)->entity_instance_id, _rmon_ctrl_ptr->my_hostname) != NULL) )
                {
                    found = false;
                    for (int j=0; j<_rmon_ctrl_ptr->resources; j++)
                    {
                        /* since we build the entity_instance_id with multiple data we must recreate it */
                        snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH,  "%s.volumegroup=%s", _rmon_ctrl_ptr->my_hostname, resource_config[j].resource);
                        if (strncmp(alarm_to_search, (active_alarms+i)->entity_instance_id, sizeof(alarm_to_search)) == 0)
                        {
                            found = true;
                            break;
                        }

                        snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH,  "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, resource_config[j].resource);
                        if (strncmp(alarm_to_search, (active_alarms+i)->entity_instance_id, sizeof(alarm_to_search)) == 0)
                        {
                            found = true;
                            break;
                        }

                        // We found the resource but lets check if the alarm is enable for it, if it's not
                        // we want to clear that alarm
                        if (found)
                        {
                           if (resource_config[j].alarm_status == ALARM_OFF)
                           {
                              found = false;
                           }
                        }
                    }
                    if (!found)
                    {
                        /* the alarm did not match any current resources so let's clear it */
                        snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, (active_alarms+i)->alarm_id );
                        snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, (active_alarms+i)->entity_instance_id);

                        ilog ("Clearing stale alarm %s for entity instance id: %s", (active_alarms+i)->alarm_id, (active_alarms+i)->entity_instance_id);

                        if (rmon_fm_clear(&alarmFilter) != FM_ERR_OK)
                        {
                            wlog ("Failed to clear stale alarm for entity instance id: %s", (active_alarms+i)->entity_instance_id);
                        }
                    }
                }
            }
        }
        free(active_alarms);
    }
    else
    {
        elog ("Failed to allocate memory for clearing stale dynamic alarms");
    }

    if (( sock_ptr->ioctl_sock = open_ioctl_socket ( )) <= 0 )
    {
        elog ("Failed to create ioctl socket");
    }

    /* Not monitoring address changes RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR */
    if (( sock_ptr->netlink_sock = open_netlink_socket ( RTMGRP_LINK )) <= 0 )
    {
        elog ("Failed to create netlink listener socket");
    }

    /* load the current interfaces for monitoring */
     _load_rmon_interfaces();

    socks.clear();
    socks.push_front (sock_ptr->rmon_tx_sock);
    socks.push_front (sock_ptr->netlink_sock);
    socks.sort();

    for (;;) {
        /* Accomodate for hup reconfig */
        FD_ZERO(&readfds);
        FD_SET(sock_ptr->rmon_tx_sock, &readfds);
        FD_SET(sock_ptr->netlink_sock, &readfds);
        waitd.tv_sec  = 0;
        waitd.tv_usec = SOCKET_WAIT ;
        tokenUtil_log_refresh ();

        /* This is used as a delay up to select timeout ; SOCKET_WAIT */
        select( socks.back()+1, &readfds, NULL, NULL, &waitd);
        if (FD_ISSET(sock_ptr->rmon_tx_sock, &readfds))
        {
            _rmon_ctrl_ptr->clients = rmon_service_inbox  ( _rmon_ctrl_ptr->clients );
        }
        else if (FD_ISSET(sock_ptr->netlink_sock, &readfds))
        {
            dlog ("netlink socket fired\n");
            if ( service_interface_events ( sock_ptr->netlink_sock, sock_ptr->ioctl_sock ) != PASS )
            {
                elog ("service_interface_events failed \n");
            }
        }

        /* Manage the health of the resources */
        if ( rmonTimer_event.ring == true )
        {
            // restart the audit period timer
            mtcTimer_start ( rmonTimer_event, rmon_timer_handler, ctrl_ptr->audit_period );
            /* service all the register and deregister requests in the queue */
            rmon_alive_notification( _rmon_ctrl_ptr->clients );
            _get_events ( );
        }

        if ( rmonTimer_pm.ring == true )
        {
            mtcTimer_start ( rmonTimer_pm, rmon_timer_handler, ctrl_ptr->pm_period );
            tokenUtil_token_refresh ( tokenEvent, ctrl_ptr->my_hostname );
            _postPMs();
        }

        /* loop through all the resource timers waiting for a ring */
        for ( int j = 0 ; j < ctrl_ptr->resources ; j++ )
        {
            if (resource_config[j].failed == true) {
                /* Run the FSM for this failed resource */
                resource_handler ( &resource_config[j]);
            }
        }

        /* loop through all the interface resources */
        for ( int j = 0 ; j < ctrl_ptr->interface_resources ; j++ )
        {
            if (interface_resource_config[j].failed == true) {
                /* Run the FSM for this failed  interface */
                interface_handler ( &interface_resource_config[j] );
            }
        }

        /* loop thorough all the LVM thinpool metadata resources waiting for a ring */
        for ( int j = 0; j < ctrl_ptr->thinmeta_resources; j++ )
        {
            if (thinmeta_resource_config[j].critical_threshold) {
                // a threshold of 0 disables monitoring
                if (thinmetatimer[j].ring == true) {
                    // restart the audit period timer
                    mtcTimer_start ( thinmetatimer[j], rmon_timer_handler,
                                     thinmeta_resource_config[j].audit_period );
                    dlog("%s/%s running audit (resource index: %i)",
                         thinmeta_resource_config[j].vg_name,
                         thinmeta_resource_config[j].thinpool_name, j)
                    /* Handle resource */
                    int k;
                    for (k = THINMETA_FSM_RETRY; k > 0; k--) {
                        // call again the FSM in case it instructs us to RETRY
                        if(thinmeta_handler(&thinmeta_resource_config[j]) != RETRY) {
                            break;
                        }
                    }
                    if (k == 0) {
                        dlog("%s/%s too many state changes in FSM at: %i stage!",
                             thinmeta_resource_config[j].vg_name,
                             thinmeta_resource_config[j].thinpool_name,
                             thinmeta_resource_config[j].stage);
                    }
                }
            }
        }

        /* handle RMON FM interface */
        rmon_fm_handler ();

        daemon_signal_hdlr ();
    }

}

/****************************************************************************
 *
 * Name       : log_value
 *
 * Purpose    : Log resource state values while avoiding log flodding for
 *              trivial fluxuations.
 *
 * Description: Recommends whether the current resource state value should
 *              be logged based on current, previous and step values.
 *
 * Caller should not generate such log if a false is returned.
 *
 * A true is returned if the currrent and previous resource values differ
 * by +/- step amount.
 *
 * The caller specifies the step that can be overridden by a smaller value
 * in rmond.conf:log_step value.
 *
 * If step is zero then a true is always returned in support of a debug mode
 * where we get the current reading as a log on every audit.
 *
 * The callers previous value is updated to current whenever true is returned.
 *
 ****************************************************************************/
bool log_value ( double & current, double & previous, int step )
{
    /* Support step override for debug purposes
     * Allows for more frequent logging */
    int _step = daemon_get_cfg_ptr()->log_step ;

    /* a lower value from the conf file takes precidence */
    if ( _step > step )
        _step = step ;

    if (( round(current) >= ( round(previous) + _step )) ||
        ( round(current) <= ( round(previous) - _step )))
    {
        previous = current ;
        return true ;
    }
    return false ;
}